{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# KDD Cup 1999 - intrusion detection \n",
    "\n",
    "http://kdd.ics.uci.edu/databases/kddcup99/task.html\n",
    "    \n",
    "Here is a paper that analyzes the dataset\n",
    "https://web.cs.dal.ca/~zincir/bildiri/pst05-gnm.pdf\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn import model_selection, linear_model, cluster, \\\n",
    "    preprocessing, metrics, pipeline, tree, ensemble, decomposition\n",
    "\n",
    "pd.options.display.max_columns = 1000\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_cluster = 30"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised', 'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate', 'Category']\n"
     ]
    }
   ],
   "source": [
    "columns = [f.split(\":\")[0] for f in \"\"\"\n",
    "duration: continuous.\n",
    "protocol_type: symbolic.\n",
    "service: symbolic.\n",
    "flag: symbolic.\n",
    "src_bytes: continuous.\n",
    "dst_bytes: continuous.\n",
    "land: symbolic.\n",
    "wrong_fragment: continuous.\n",
    "urgent: continuous.\n",
    "hot: continuous.\n",
    "num_failed_logins: continuous.\n",
    "logged_in: symbolic.\n",
    "num_compromised: continuous.\n",
    "root_shell: continuous.\n",
    "su_attempted: continuous.\n",
    "num_root: continuous.\n",
    "num_file_creations: continuous.\n",
    "num_shells: continuous.\n",
    "num_access_files: continuous.\n",
    "num_outbound_cmds: continuous.\n",
    "is_host_login: symbolic.\n",
    "is_guest_login: symbolic.\n",
    "count: continuous.\n",
    "srv_count: continuous.\n",
    "serror_rate: continuous.\n",
    "srv_serror_rate: continuous.\n",
    "rerror_rate: continuous.\n",
    "srv_rerror_rate: continuous.\n",
    "same_srv_rate: continuous.\n",
    "diff_srv_rate: continuous.\n",
    "srv_diff_host_rate: continuous.\n",
    "dst_host_count: continuous.\n",
    "dst_host_srv_count: continuous.\n",
    "dst_host_same_srv_rate: continuous.\n",
    "dst_host_diff_srv_rate: continuous.\n",
    "dst_host_same_src_port_rate: continuous.\n",
    "dst_host_srv_diff_host_rate: continuous.\n",
    "dst_host_serror_rate: continuous.\n",
    "dst_host_srv_serror_rate: continuous.\n",
    "dst_host_rerror_rate: continuous.\n",
    "dst_host_srv_rerror_rate: continuous.\n",
    "\"\"\".split(\"\\n\") if len(f)>0]\n",
    "\n",
    "columns.append(\"Category\")\n",
    "print(columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"/data/kddcup.data\", header=None, names=columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>protocol_type</th>\n",
       "      <th>service</th>\n",
       "      <th>flag</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>num_failed_logins</th>\n",
       "      <th>logged_in</th>\n",
       "      <th>num_compromised</th>\n",
       "      <th>root_shell</th>\n",
       "      <th>su_attempted</th>\n",
       "      <th>num_root</th>\n",
       "      <th>num_file_creations</th>\n",
       "      <th>num_shells</th>\n",
       "      <th>num_access_files</th>\n",
       "      <th>num_outbound_cmds</th>\n",
       "      <th>is_host_login</th>\n",
       "      <th>is_guest_login</th>\n",
       "      <th>count</th>\n",
       "      <th>srv_count</th>\n",
       "      <th>serror_rate</th>\n",
       "      <th>srv_serror_rate</th>\n",
       "      <th>rerror_rate</th>\n",
       "      <th>srv_rerror_rate</th>\n",
       "      <th>same_srv_rate</th>\n",
       "      <th>diff_srv_rate</th>\n",
       "      <th>srv_diff_host_rate</th>\n",
       "      <th>dst_host_count</th>\n",
       "      <th>dst_host_srv_count</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "      <th>Category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>215</td>\n",
       "      <td>45076</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>tcp</td>\n",
       "      <td>http</td>\n",
       "      <td>SF</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>normal.</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration protocol_type service flag  src_bytes  dst_bytes  land  \\\n",
       "0         0           tcp    http   SF        215      45076     0   \n",
       "1         0           tcp    http   SF        162       4528     0   \n",
       "2         0           tcp    http   SF        236       1228     0   \n",
       "3         0           tcp    http   SF        233       2032     0   \n",
       "4         0           tcp    http   SF        239        486     0   \n",
       "\n",
       "   wrong_fragment  urgent  hot  num_failed_logins  logged_in  num_compromised  \\\n",
       "0               0       0    0                  0          1                0   \n",
       "1               0       0    0                  0          1                0   \n",
       "2               0       0    0                  0          1                0   \n",
       "3               0       0    0                  0          1                0   \n",
       "4               0       0    0                  0          1                0   \n",
       "\n",
       "   root_shell  su_attempted  num_root  num_file_creations  num_shells  \\\n",
       "0           0             0         0                   0           0   \n",
       "1           0             0         0                   0           0   \n",
       "2           0             0         0                   0           0   \n",
       "3           0             0         0                   0           0   \n",
       "4           0             0         0                   0           0   \n",
       "\n",
       "   num_access_files  num_outbound_cmds  is_host_login  is_guest_login  count  \\\n",
       "0                 0                  0              0               0      1   \n",
       "1                 0                  0              0               0      2   \n",
       "2                 0                  0              0               0      1   \n",
       "3                 0                  0              0               0      2   \n",
       "4                 0                  0              0               0      3   \n",
       "\n",
       "   srv_count  serror_rate  srv_serror_rate  rerror_rate  srv_rerror_rate  \\\n",
       "0          1          0.0              0.0          0.0              0.0   \n",
       "1          2          0.0              0.0          0.0              0.0   \n",
       "2          1          0.0              0.0          0.0              0.0   \n",
       "3          2          0.0              0.0          0.0              0.0   \n",
       "4          3          0.0              0.0          0.0              0.0   \n",
       "\n",
       "   same_srv_rate  diff_srv_rate  srv_diff_host_rate  dst_host_count  \\\n",
       "0            1.0            0.0                 0.0               0   \n",
       "1            1.0            0.0                 0.0               1   \n",
       "2            1.0            0.0                 0.0               2   \n",
       "3            1.0            0.0                 0.0               3   \n",
       "4            1.0            0.0                 0.0               4   \n",
       "\n",
       "   dst_host_srv_count  dst_host_same_srv_rate  dst_host_diff_srv_rate  \\\n",
       "0                   0                     0.0                     0.0   \n",
       "1                   1                     1.0                     0.0   \n",
       "2                   2                     1.0                     0.0   \n",
       "3                   3                     1.0                     0.0   \n",
       "4                   4                     1.0                     0.0   \n",
       "\n",
       "   dst_host_same_src_port_rate  dst_host_srv_diff_host_rate  \\\n",
       "0                         0.00                          0.0   \n",
       "1                         1.00                          0.0   \n",
       "2                         0.50                          0.0   \n",
       "3                         0.33                          0.0   \n",
       "4                         0.25                          0.0   \n",
       "\n",
       "   dst_host_serror_rate  dst_host_srv_serror_rate  dst_host_rerror_rate  \\\n",
       "0                   0.0                       0.0                   0.0   \n",
       "1                   0.0                       0.0                   0.0   \n",
       "2                   0.0                       0.0                   0.0   \n",
       "3                   0.0                       0.0                   0.0   \n",
       "4                   0.0                       0.0                   0.0   \n",
       "\n",
       "   dst_host_srv_rerror_rate Category  \n",
       "0                       0.0  normal.  \n",
       "1                       0.0  normal.  \n",
       "2                       0.0  normal.  \n",
       "3                       0.0  normal.  \n",
       "4                       0.0  normal.  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "smurf.              2807886\n",
       "neptune.            1072017\n",
       "normal.              972781\n",
       "satan.                15892\n",
       "ipsweep.              12481\n",
       "portsweep.            10413\n",
       "nmap.                  2316\n",
       "back.                  2203\n",
       "warezclient.           1020\n",
       "teardrop.               979\n",
       "pod.                    264\n",
       "guess_passwd.            53\n",
       "buffer_overflow.         30\n",
       "land.                    21\n",
       "warezmaster.             20\n",
       "imap.                    12\n",
       "rootkit.                 10\n",
       "loadmodule.               9\n",
       "ftp_write.                8\n",
       "multihop.                 7\n",
       "phf.                      4\n",
       "perl.                     3\n",
       "spy.                      2\n",
       "Name: Category, dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.Category.value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Attacks fall into one of\n",
    "four categories: User to Root; Remote to Local; Denial of\n",
    "Service; and Probe.\n",
    " - Denial of Service (dos): Attacker tries to prevent\n",
    "legitimate users from using a service.\n",
    " - Remote to Local (r2l): Attacker does not have an\n",
    "account on the victim machine, hence tries to\n",
    "gain access.\n",
    " - User to Root (u2r): Attacker has local access to\n",
    "the victim machine and tries to gain super user\n",
    "privileges.\n",
    " - Probe: Attacker tries to gain information about\n",
    "the target host.\n",
    "\n",
    "Mapping is below. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "attack_types = {\n",
    " 'normal.': \"normal\", \n",
    " 'buffer_overflow.':'u2r', \n",
    " 'loadmodule.':'u2r', \n",
    " 'perl.':'u2r', \n",
    " 'neptune.':'dos',\n",
    " 'smurf.':'dos',\n",
    " 'guess_passwd.':'r2l', \n",
    " 'pod.': 'dos', \n",
    " 'teardrop.':'dos',\n",
    " 'portsweep.':'probe',\n",
    " 'ipsweep.':'probe',\n",
    " 'land.':'dos',\n",
    " 'ftp_write.':'r2l',\n",
    " 'back.': 'dos',\n",
    " 'imap.': 'r2l',\n",
    " 'satan.': 'probe',\n",
    " 'phf.':'r2l',\n",
    " 'nmap.':'probe',\n",
    " 'multihop.':'r2l',\n",
    " 'warezmaster.':'r2l',\n",
    " 'warezclient.':'r2l',\n",
    " 'spy.':'r2l',\n",
    " 'rootkit.':'u2r'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[\"label\"] = np.where(df.Category == \"normal.\", \"normal\", \"attack\")\n",
    "df[\"attack_type\"] = df.Category.apply(lambda r: attack_types[r])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "attack    0.80141\n",
       "normal    0.19859\n",
       "Name: label, dtype: float64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.label.value_counts()/df.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dos       3883370\n",
       "normal     972781\n",
       "probe       41102\n",
       "r2l          1126\n",
       "u2r            52\n",
       "Name: attack_type, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.attack_type.value_counts(dropna=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>duration</th>\n",
       "      <th>src_bytes</th>\n",
       "      <th>dst_bytes</th>\n",
       "      <th>land</th>\n",
       "      <th>wrong_fragment</th>\n",
       "      <th>urgent</th>\n",
       "      <th>hot</th>\n",
       "      <th>num_failed_logins</th>\n",
       "      <th>logged_in</th>\n",
       "      <th>num_compromised</th>\n",
       "      <th>root_shell</th>\n",
       "      <th>su_attempted</th>\n",
       "      <th>num_root</th>\n",
       "      <th>num_file_creations</th>\n",
       "      <th>num_shells</th>\n",
       "      <th>num_access_files</th>\n",
       "      <th>num_outbound_cmds</th>\n",
       "      <th>is_host_login</th>\n",
       "      <th>is_guest_login</th>\n",
       "      <th>count</th>\n",
       "      <th>srv_count</th>\n",
       "      <th>serror_rate</th>\n",
       "      <th>srv_serror_rate</th>\n",
       "      <th>rerror_rate</th>\n",
       "      <th>srv_rerror_rate</th>\n",
       "      <th>same_srv_rate</th>\n",
       "      <th>diff_srv_rate</th>\n",
       "      <th>srv_diff_host_rate</th>\n",
       "      <th>dst_host_count</th>\n",
       "      <th>dst_host_srv_count</th>\n",
       "      <th>dst_host_same_srv_rate</th>\n",
       "      <th>dst_host_diff_srv_rate</th>\n",
       "      <th>dst_host_same_src_port_rate</th>\n",
       "      <th>dst_host_srv_diff_host_rate</th>\n",
       "      <th>dst_host_serror_rate</th>\n",
       "      <th>dst_host_srv_serror_rate</th>\n",
       "      <th>dst_host_rerror_rate</th>\n",
       "      <th>dst_host_srv_rerror_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>215</td>\n",
       "      <td>45076</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0</td>\n",
       "      <td>162</td>\n",
       "      <td>4528</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.00</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>236</td>\n",
       "      <td>1228</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.50</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0</td>\n",
       "      <td>233</td>\n",
       "      <td>2032</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.33</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>239</td>\n",
       "      <td>486</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.25</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   duration  src_bytes  dst_bytes  land  wrong_fragment  urgent  hot  \\\n",
       "0         0        215      45076     0               0       0    0   \n",
       "1         0        162       4528     0               0       0    0   \n",
       "2         0        236       1228     0               0       0    0   \n",
       "3         0        233       2032     0               0       0    0   \n",
       "4         0        239        486     0               0       0    0   \n",
       "\n",
       "   num_failed_logins  logged_in  num_compromised  root_shell  su_attempted  \\\n",
       "0                  0          1                0           0             0   \n",
       "1                  0          1                0           0             0   \n",
       "2                  0          1                0           0             0   \n",
       "3                  0          1                0           0             0   \n",
       "4                  0          1                0           0             0   \n",
       "\n",
       "   num_root  num_file_creations  num_shells  num_access_files  \\\n",
       "0         0                   0           0                 0   \n",
       "1         0                   0           0                 0   \n",
       "2         0                   0           0                 0   \n",
       "3         0                   0           0                 0   \n",
       "4         0                   0           0                 0   \n",
       "\n",
       "   num_outbound_cmds  is_host_login  is_guest_login  count  srv_count  \\\n",
       "0                  0              0               0      1          1   \n",
       "1                  0              0               0      2          2   \n",
       "2                  0              0               0      1          1   \n",
       "3                  0              0               0      2          2   \n",
       "4                  0              0               0      3          3   \n",
       "\n",
       "   serror_rate  srv_serror_rate  rerror_rate  srv_rerror_rate  same_srv_rate  \\\n",
       "0          0.0              0.0          0.0              0.0            1.0   \n",
       "1          0.0              0.0          0.0              0.0            1.0   \n",
       "2          0.0              0.0          0.0              0.0            1.0   \n",
       "3          0.0              0.0          0.0              0.0            1.0   \n",
       "4          0.0              0.0          0.0              0.0            1.0   \n",
       "\n",
       "   diff_srv_rate  srv_diff_host_rate  dst_host_count  dst_host_srv_count  \\\n",
       "0            0.0                 0.0               0                   0   \n",
       "1            0.0                 0.0               1                   1   \n",
       "2            0.0                 0.0               2                   2   \n",
       "3            0.0                 0.0               3                   3   \n",
       "4            0.0                 0.0               4                   4   \n",
       "\n",
       "   dst_host_same_srv_rate  dst_host_diff_srv_rate  \\\n",
       "0                     0.0                     0.0   \n",
       "1                     1.0                     0.0   \n",
       "2                     1.0                     0.0   \n",
       "3                     1.0                     0.0   \n",
       "4                     1.0                     0.0   \n",
       "\n",
       "   dst_host_same_src_port_rate  dst_host_srv_diff_host_rate  \\\n",
       "0                         0.00                          0.0   \n",
       "1                         1.00                          0.0   \n",
       "2                         0.50                          0.0   \n",
       "3                         0.33                          0.0   \n",
       "4                         0.25                          0.0   \n",
       "\n",
       "   dst_host_serror_rate  dst_host_srv_serror_rate  dst_host_rerror_rate  \\\n",
       "0                   0.0                       0.0                   0.0   \n",
       "1                   0.0                       0.0                   0.0   \n",
       "2                   0.0                       0.0                   0.0   \n",
       "3                   0.0                       0.0                   0.0   \n",
       "4                   0.0                       0.0                   0.0   \n",
       "\n",
       "   dst_host_srv_rerror_rate  \n",
       "0                       0.0  \n",
       "1                       0.0  \n",
       "2                       0.0  \n",
       "3                       0.0  \n",
       "4                       0.0  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_num = df.select_dtypes(include=[np.float64, np.int64])\n",
    "df_num.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = preprocessing.StandardScaler().fit_transform(df_num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 28.8 s, sys: 8.69 s, total: 37.5 s\n",
      "Wall time: 28.4 s\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEKCAYAAAASByJ7AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3Xd4FNUawOHf2ZJNNhVCQiihN6VDAAFBUBAUqSIigqggFqxgb1iwo6JevcpVQaVXQQQREBBUlC4l9BoIEEIgdfu5f0zYZNlNI2UDnPc+eS47OzvzLYb5Zk75jpBSoiiKoig6fwegKIqilA8qISiKoiiASgiKoihKNpUQFEVRFEAlBEVRFCWbSgiKoigKoBKCoiiKkk0lBEVRFAVQCUFRFEXJZvB3AEVRqVIlWatWLX+HoSiKclnZtGnTGSllVEH7XVYJoVatWmzcuNHfYSiKolxWhBBHCrOfajJSFEVRAJUQFEVRlGwqISiKoiiASgiKoihKtlJPCEKIb4UQp4UQO3Jt+0AIsVsI8a8QYoEQIqK041AURVHyVxZPCFOAnhdtWw40kVI2A/YCL5RBHIqilKSjR2H+fPjzT1ALbV0RSn3YqZTydyFErYu2/Zrr5XpgYGnHoShKCVq6FAYOBIMBnE7o2xemTgUh/B2ZUgzloQ/hfmCpv4NQFKWQpIS77oLMTEhNhYwMWLgQfv214M8q5ZpfE4IQ4iXAAUzLZ59RQoiNQoiNSUlJZRecoii+2WxaIshNSq0JSbms+S0hCCGGA7cBd0uZdwOklHKSlDJOShkXFVXgzGtFUUqbyQR163o3D7Vu7Z94lBLjl4QghOgJPAf0kVJm+iMGRVGK4eefoXp1CAyEgAD46CNo1crfUSnFVOqdykKIGUAXoJIQIgEYhzaqyAQsF9pdxnop5UOlHYuiKCWkQQM4fBiSkiAiQntqUC57ZTHK6C4fm78p7fMqilLKdDqoXNnfUSglqDyMMlIURVHKAZUQFEVRFEAlBEVRFCWbSgiKoigKoBKCoiiKkk0lBEVRFAVQCUFRFEXJphKCoiiKAqiEoCiKomRTCUFRFEUBVEJQFEVRsqmEoCiKogAqISiKoijZVEJQFEVRAJUQFEVRlGwqISiKoiiASgiKoihKNpUQFEVRFKAMltBUlMuV0+Vk6r9TOXTuEHFV47itwW3+DklRSlWpJwQhxLfAbcBpKWWT7G0VgVlALeAwMEhKmVLasShKYbmki9um38bao2vJtGdiNpp5vN3jvH3T2/4OTVFKTVk0GU0Bel607XlgpZSyPrAy+7WilBvrE9az7tg6MuwZSCQZ9gwm/DmBVGuqv0NTlFJT6glBSvk7cPaizX2B77L//B3Qr7TjUJSiOGc5h054/vMw6AwqIShXNH91KleWUiYCZP9/dF47CiFGCSE2CiE2JiUllVmAytWtbbW2Hq/1Qk+1sGpUDa3qp4gUpfSV+1FGUspJUso4KWVcVFSUv8NRrhKVzJVYNXwVjSIbEWwMpl31dqwavsrrqUFRriT+GmV0SghRRUqZKISoApz2UxyKkqdWVVoR/2i8v8NQlDLjr9udRcDw7D8PBxb6KQ5FURQlW6knBCHEDOAvoKEQIkEIMQJ4F+guhNgHdM9+rSiKovhRqTcZSSnvyuOtm0r73IpSVBaHhb+O/QVA+9j2BBoC/RyRopQdNVNZKbcybBm8uPJFNiZupGl0U97t9i4RgREA2Jw2kjOTiQ6ORq/Tl8j5kjOTue6b6ziVfgqAyiGVWT9iPZHmyBI5vqKUdyohKOWSS7ro/kN3tiRuweK0sOnEJtYdXceWB7fw096fGDp/KBJJsDGYpXcvpU21NsU+5/MrnufIuSPYXXYArOesPL/ief7X53/FPraiXA7UGDqlXDpw9gDbTm3D4rQAYHVaOXL+CL/s/4VhC4aR5cjC4rCQnJVM26/bEvxWMEPmDSHDlnHJ54w/E+9OBgA2l434M2qUkXL1UE8ISrkkhPC5/cDZAxh1Rq/tmY5MZuyYwZnMM9SuUJsTaSfo06API1uNzPNYadY0Xlv9GttPb+e66tfRtlpbNiVuwuLQklCQIYgOsR1K7kspSjmnEoJSLtWpUIeY4BgOnjsIgE7oqBVei841O7sv2L4sP7gcgzDgkA5WHVrF4XOHeeumt7z2szvtXD/5evac2YPVaWXd0XV0iO1Ax+odWXdsHQAdYzvyRtc3SucLKko5pBKCUi4tiF9AYnqi+7VO6Bh47UBWH1mNUzrz/axDOgDIsGfw8fqPGX/jeK+nhE2JmziYchCr0wpAliOLP479Qfwj8QQYAgCoElIlz6cLRbkSqYSglEvTt08ny5Hlfu1wOZi7ay57kvfgcDny/Jxe6D0ShtVhZdGeRfRt1NdjP6fLicDzYi8QSKSqV6RctVSnslIumY1mr20u6cp3iGmPOj0IDgj2/Awu7p5/N88tf85je1zVOKKDo939ESa9iWaVm1E9rDprDq9h6b6lpGSpJTqUq4tKCEq5VMlcyWubw+WgSkgV77LUwkD1sOrMGTSHv0b8RZuqbdDl+tW+0HSUu3S1yWDirxF/cUfjO2gZ05LhLYaz+K7FdJ7Smdtm3MbgeYOp/1l99pzZU3pfUik/qlcHIbSfV1/1dzR+oxKCUi5lOjK9tiVlJjG8+XCizFEIBCa9iSB9EACn0k7RdUpXggxBvNT5JUJMIR6fNegMpFnTPLZFBUcxbcA0Nj+4ma9u+4of/v2BbSe3kW5LJ9WaSnJWMnH/i2Pi+olIKUvvyyr+FRQEx4/nvH7zTXj76lwZT1xOv+hxcXFy48aN/g5DuUQu6Sp0+eh5u+Zxz4/3kGnPSQw6dMjs/+XnQl9A7s/ViKjBpz0/5enlT5NqTcVsMGN1WmkQ2YBv+nxD7Qq1eeinh/hq81dexzMbzTzd/mle7/p6Ib+pclnxNXBApwNn/oMXLidCiE1SyriC9lNPCEqpm7VjFuHvhmN808h1X1/H6YyCq53ffu3t3Nf8Pgy6nHEPLlwFJgPAex8Bx88fp/+s/uxN3svJ9JMcPHeQ42nHWXNkDdd9cx3T/53O6iOrfR4v057J5xs+L/C8yhXkUm6U7Xaw5D0k+nKgEoJSqrae3Mp9C+8j1ZqKS7rYlLiJTpM7UeeTOlSeUJnRP4/G7rR7fEZKyUOLH2LytsnuJqHicEkXdmn3OVzVJV0kZSRx/6L72ZOcd3+BGn56lWnXrvD7Sgl33w0mk9b81LQpZHo3eV4O1LBTpVStPbLW447d4XKwN3mv+/XkrZPRCR2f3fpZzmeOruWHbT/47EcoDRLpno/gS6AhkLHtx5ZJLIof7N8P9erlvK5WDf76q/CfHz8epk/Peb1jB7RqBbt3l1yMZUQ9ISilKjo4Gr3Ie6holiOLObvmeGzbm7zXYw7CpTDpTYXa7+K5CL5IKbm76d3Fikcpx+rW1e7yL/wkJBT+s4mJ8M473tv37AGXq+RiLCMqISil6vZrb6d5THNCjCEEGYLQ450cQgI8RwStOLiiUH0F+bE5bYy5bgxmgxkdOgL0ARhEzgOxDh16oUcIUWBSMOgM/Hbot2LFo1yBzp6FOnUgK4+bl4MHyzaeEqASglKqDDoDa+5dw+R+k3n1hlcJNgV77TPw2oHuP+9L3sf8+PnFPq9EMvHviXSu2Znk55JJHJtI30Z9iQ2LpUZYDYx6I07pxCVzOqp16Ggc1djrWEIIgoxaX4ZLulh+YDnTt0/nUMqhYsepXMYmTMi/E/nxx7WRSu+9B127wj33eA5vLYdUH4JS6gw6AwOvHcjba98m0+bdL/DJ+k9oGNmQVGsqmfZMjDqjRxnqS+WSLtYcWcMrv71C4+jGDG02lFkDZ9Fzak+Oph712j8kIIRFgxcxYtEI94gjvdBTJaQKtzW4DafLSe8ZvVl7dC0CgVM6WXDnAm6ue3OxY1UuQ0lJ+b+/dCk89JDWv5CZCXo9LFsG8fFQsWLZxFhEfk0IQoingJGABLYD90kpL+9xW4oXp8vJtlPb2HNmj7vwXG5Wp5VHlzzqHgVUknNjshxZfLHxC0x6E3qdnuuqX8e1Udey9uhar47kNFsa9T6r59FcJYRgbPuxmI1m5u2ax+9HfifDnrPmwtD5Qzn9TMHDaP3mzz/hP/+BJk3gxRf9Hc2V5Z574Ouv898n9/tOp5YYliyBoUNLN7ZL5LcmIyFENeBxIE5K2QTQA4P9FY9SOiwOC12mdKHz5M7M3TXXZ3u9RJLpyMTqtGJ1Wovdf3Axl3SR5cgi3ZbO+oT1dIjtQMNKDb1i8TXpzeFyuJuwjqcd9yqsl5yVXH5nMT/4IHTsCDNmwEsvacMiHXkXBlSKqFMneP993xPb8nKh47qc8ncfggEIEkIYADNwws/xKCXE4rAw9d+pDJoziA0nNpBhzyDTkYlAEKjXFq436oyY9CavBW/yq2aanwBdAAceO0BYQBgC4VHP6AKXS2tG+ujmjxjRckShRiMdOqf1FbSr1s6juJ5e6GleuXn5naMwaZLna5sNevTwTyxXqmeegeXL898nUPt9R6/X5incemvpx3WJ/JYQpJTHgQnAUSAROC+l/NVf8SglJ9OeSdykOB5a/BBL9i3xaJpx4aJqaFXOP3+e7Q9v57fhv2HU5yQEo87oNeqosLrW7krLr1qSaktFInHhPewv05HJ99u+p9+sfhxLPca4G8ZRO6J2viONzmadBaBd9XZ8ePOH2oglnYH6kfVZOHjhJcVaLL//rjUD/fJL3nebeZVdiC+jJUGl1NrYz5wpm/P5U+vW+b/vdEKzZjBgAGzcCJGRZRPXJfBnk1EFoC9QG6gKBAshvBrWhBCjhBAbhRAbkwrqxFHKhclbJnMw5SAZ9gyfs4NNBhNhpjAaVmpIh9gOTO47mQqBFTDoDFxf43re7PomQYaizVAOMYbw26HfSLWlFrhvhj2DdFs6646uIyYkhhRLSr7NVOGB4e4/PxT3EBkvZpD8bDLxo+OJDY8tUpzFMncuVK2qjVgZMwYGDoQHHvC9rz6PuR+tWpVefHv3wqefwhdfQLduWgXRatWgd2/t6eRKFRGhdR7nxW7Xmu5mz4aaNcsurkvgz07lbsAhKWUSgBBiPtABmJp7JynlJGASaMXtyjpIpehOZ5zOd5nLdFu6x+tBjQcxqPEg92spJSaDiXfXvcvpjNNIKbE4PY+Xu4BdsDEYp8tZ5JFJVqeVrSe34vIxgejCMpxmg5mPe3zs+Z7OQJgprEjnKrZFi7ROzAtj3l0u7UIzYwY89RQ09h4uy7PPam3cFwQHw+LFJRvX33/Dk09qk7lOncopCpe7r2LlSq2C6Jtvluy5y5P//hfWroWdO32/f5n03fizD+EocJ0Qwiy0RtibgDJ6nlVKU9faXd3j9n0JDQjN8729yXv55/g/DG02lMNPHibzpUzubHJnvufLsGd4JYzCCNAH0LFGR6/RRia9iafaP8XzHZ9n5fCV9GnYp8jHLnGffeZ7ApTRCKfzGOX03ntw4IDWzj19OqSn+97vUu3dCzfdBOvXawnBbger1fvil5UF69aV7LnLo2+/zfu9kSPLLo5i8NsTgpTybyHEXGAz4AC2kP0koFzeutTqwsQeExmzbAyZ9kx0Oh1SSpzSSZAhiPax7Xlt9Wv0qt+LNtXaANpTwb0L72XOzjkY9UYCdAGsvnc1+87uw6jXOp8vXLgvdARffCG/sHzmhf4AndARFRzFOcs5LA4LBmFAItHr9EgpefWGV7nj2jv4evPXrDi4AtCSxFPXPcU73XyUI/AnUx6d31Jq7dN5qVPH8ymhJC1cWLimoIAAaNCgdGIoT9q2hZAQ78Sr00FcgZWnywW1HoJSqqSUHEs9xpcbvyQ5M5kFuxeQak3F5rQRZAxi+oDp9G3Ul7m75nLvj/d6jPEPN4XjcDmwOW0ItNnCIQEh9Kjbgxk7ZnjUOwrQB9Crfi8OpRyiQ2wHPrj5A/cynHN2zuHnfT9TJaQKY9uPRSIJNYUSaAjkw78+5NVVr7rXXTDpTay8ZyUda3Qs27+ogqxbp40Qyl1FMzISfvoJ2rf3T0yffALPPac9FVxMCG10jcEA0dHwzz/ldjJWiXrrLXj5Zc9tQmhPSXkl9TJQ2PUQ1ExlpVQJIagRXoO3b3qbCX9OINWa6r6zz7Rn8sQvT9C3UV/2nNnjVdDuvPW858EcsO+xfUQFR2HUG5n671SEELhcLj68+UMeauO7Y++OxndwR+M7fL731cavPBbhsTqtfLftu/KXEK6/Hlas0DptpYTRo7Vx8JfKbteam4pjyBDtAnj2rNZvoNNpF7/gYBg3TisDLYQW+4Whl1c6X0mvnM89yE0lBKXMnLOcw+b0bGJIs2nLWl4bdS1BhiCPJ4SLuaSLUxmniAqO4svbvmRI0yEcSjlEi5gWNI9pfkkxBegDPF4LBCaD/+7k8tW+feGeBuLjtSGfX3yhjWwp6sXouutgzhxtlFB+oqJg61atSSopCfr310Y+Xc2iorS5Brn7ey4kysuAvyemKVeRXvV7eQwnDTQE0qeB1mHbt2FfWsS0yPfzDpeDz//JWbmsc83ODG8x/JKTAcAbXd9wxyQQBAcE82ibRy/5eKXuQvkDX6TUhj82aQI33ACzZl3anen69dClS+FGxlStChMnwrRpKhkA3Hyz9pRgyL7XNpu10WF+bC4qCpUQlDLTPrY9U/pNoVpoNSICI7iz8Z080uYR6n5SF8ObBv489meBx1i4p+gTwWZsn0G9T+tR4+MavLHmDVwyZ5jpgGsGsHDwQoY0GcLIViPZ8MAGGlZqWORzlImJE7ULTFiY1kmZe3RRfDyEh8NXX5VMHf7ERDjqXQBQKUBYGGzaBMOHayOwXn214HpH5YjqVFb8JtOeSY2Pa5CclVzozzSMbMjuRwu/EtWy/csYMHuAu5/AbDTzUqeXeLHTZVbobfVq6NUr5+nAYNDa5letgp9/httuK9nzGY1aqeaoqJI9ruIXhe1UVk8Iit/sObOnSJPJzAYzE3tOLNI5pm2f5tFpnGnP5Id/fyjSMcqFP//0HM3jcGgjdwDuv7/kz/fAAyoZXIVUp7LiN5XMlbA58h/HrkPHsObDqBlekz4N+9C6agF1Yy4SGhCKDp1HXaNgo/ciPeVe1araSJ2MXJ3uFy7YKSkld57wcPjf/1R/wFVKPSEofhMbHsvotqMJNgZ7VTy9oN81/ZjSbwqvd329yMkAYEz7MYSYQtAJ7VfdbDDzbrd3ixW3X9x9N7RooU18Cg3VhnZ+9532XnHnITRpAtu2aR3Q587BHXdcNqNilJKl+hCUYrM77Uz8eyKbTmyieeXmjGk/pkhDN+fsnMPQBUO9hqT2qNODpUOXFru89KGUQ3y16SssDgtDmg6hbbW2xTqe3zgc2opb585pxdJq1dK2JyVp/QsbNhR8DIMBunfXngKqVSvVcJXyo7B9CCohKMUipeTWabey5sgashxZBBmCaFe9HSvvWem+Ky/Ib4d+Y8CsAR4T0cxGM5tGbaJRpUalFXr5dPIk7NihNRFde23RPpt7cpii5KI6lZUysf/sfn4/+rt7lnGWI4sNxzew83QeVR99iA6O9pql7HQ5iQ6OLtFYy73ly6FePa39Pi5Oq2JaFHq9SgZKsaiEoBSL1Wn1ehLQCZ1X809ebE4b9y+836MEtUlv4v3u71Mx6CqofXOBlFoiyMiA8+e1ma6TJmmjixSljKiEoBRLo0qNqBZazd0pbNAZqGSuRJPoJoX6/Kwds9iVtAuHzJkVazaaebzd46USb7mVluY9A1mn08pXK0oZUQlBKRaDzsDa+9bSu2Fv6laoS6/6vfhzxJ+F7lROykzymotw8QI6V4XQUO+lFV0urUCcopQRNQ9BKbao4CjmDZp3SZ/tXLMzepGz3KNRZ6RDbIeSCu3yIQQsWaLVwrFYtBFF77yjDTVVlDKinhAUv4qrGsf/+vyPMFMYeqGnXfV2zB00199h+UerVnDihFZB9NQpePwqazZT/E4NO1XKDSllseccKIriTQ07VS47Khkoin+phKAoiqIAfk4IQogIIcRcIcRuIUS8EMJPi8MqiqIo/h5l9Anwi5RyoBAiADD7OR5FKZQLfW+qmUu5kvjtCUEIEQZ0Br4BkFLapJTn/BWPohSGw+VgxMIRmMabCHoriGeXP8vlNDBDUfLjzyajOkASMFkIsUUI8bUQ4jIsVK9cTV5b/Rozd8zE7rJjdVr5fMPnfLnpS3+HpSglwp8JwQC0Av4rpWwJZADPX7yTEGKUEGKjEGJjUlJSWceoKB5+3vszmQ7PFdh+3vuzHyNSlJLjz4SQACRIKf/Ofj0XLUF4kFJOklLGSSnjotSSfoqfxYTGIMjpNzAIA9VC1boCypXBbwlBSnkSOCaEaJi96SZgl7/iUZTC+LjHx4SZwjAbzZiNZioGVWRcl3H+DktRSoS/Rxk9BkzLHmF0ELjPz/EoSr4aVWrErtG7WLx3MXqhp/81/fMs0+2SWknvwi4UpCj+5teEIKXcChQ4nVpRypOqoVUZ1XpUnu87XA4eWvwQ3237DoHgwdYP8sktn6jEoJR76jdUUUrYW7+/xYwdM3C4HNhddr7d+i0T10/0d1iKUiCVEBSlhC3Zt4RM+0UjkfapkUhK+acSglLq/jr2F8N/HM6IhSPYkrjF3+GUuqphVT2ah/RCT/Ww6n6MSFEKx9+dysoVbvXh1fSa1ss9dn/mzpmsuXcNcVWv3K6jCd0nsObwGqxOKwKB2WjmrRvf8ndYilIglRCUUvX6mte9JnK9u+7dK3oRnLoV67L70d38vPdnhBD0btCbSHNkwR9UFD9TCUEpVRaHxWtbliPLD5GUrejgaO5rqUZRK5cX1YeglKqH4x7GbMwpYms2mnmo9UN+jEhRlLyohKCUuBUHV9DgswZEfRDFykMr+fDmD2kS1YRmlZvxbZ9v6d2wt79DVBTFB9VkpJSoHad30HdGX3e/weyds7E4LGx/ZLufI1MUpSDqCUEpUcv2L8PusrtfWxwWftrzkx8jUhSlsFRCUEpUSEAIBp3ng2eQMchP0SiKUhSXnBCEEGoIheJlSNMhRAdHY9KbAK0T+YPuHxTrmE6XsyRCUxSlAMXpQ3gdmFxSgShXhlBTKNse2sakTZNIykyiZ72e3Fj7xks61vqE9QyYNYCT6SepEV6DhYMX0jymeQlHrCjKBSK/9WCFEP/m9RbQQEppKpWo8hAXFyc3btxYlqdU/CQlK4Van9Qi1Zrq3hYZFMmxp46pJihFKSIhxCYpZYHlAQp6QqgM9ABSLj4+8OclxqYoBdqZtNNjZTIAm9PG/rP7aVq5qZ+iUpQrW0EJYTEQkr1ugQchxOpSiUhR0Gb62pw2j202p42oYLWMqqKUlnw7laWUI6SU6/J4b0jphKQo0CCyASNbjSTYGEyQIQiz0cxzHZ8jJiTG36EpyhVLTUxTyq1Pb/mUvg37sjd5L02im9CpZid/h6QoVzS/JwQhhB7YCByXUt7m73j8bv16WLMGLBbQ6yEiAoYNg/DwSzteaio89RSsWAGRkfDOO9CjR8nGfJGzWWdJSE2gVkQtwkxhxTrWTXVu4qY6N5VQZIqi5MfvCQF4AogHinfluBJMmQKjR4PVCs7ssfcmE7z/PmzbBhUqFO14Fgu0aAGHDmmvjx6FXr1g7lzo169EQ79g8pbJPLLkEYw6Iy7pYt6gefSoV7oJSFGUkuHXmcpCiOpAL+Brf8ZRbjz2GGRm5iQD0JLDqVPwv/8V/XirV0NCguc2pxNeeaVYYeblyLkjjF4yGovDQpotjQx7BgPnDPRYTlJRlPLL36UrJgLPAi4/x+F/LpeWDHyx2SA5uejHtNuLtr2Y9ibvJUAf4LlRQkJqgu8PKIpSrvgtIQghbgNOSyk3FbDfKCHERiHExqSkpDKKzg90OujYEYxG7/eCguDWW4t+zE6dIDjY+zyPPXZpMRagbsW6XkNFXbioGlq1VM6nKErJ8ucTQkegjxDiMDATuFEIMfXinaSUk6SUcVLKuKioK3wM+oIF0Lmz1m8QGKj9REfDl1/CDTcU/XgREbBlC8TFaceqUAE++AAeeaTkYwfqVKjDe93eI8gQRLgpHLPRzNT+UwkJCCmV8ymKUrLyLV1RZkEI0QV4uqBRRqp0xeXh2PljHDl/hPoV61M5pLK/w1GUq15Jla5QlCKLDY8lNjzW32EoilJE5SIhSClXA6v9HMZVz+lyotfp/R2Goih+4u9RRko5sPbIWipPqIzxTSP1Pq3HrqRd/g5JURQ/UAnhKnc64zS3Tr+V0xmnkUgOpBzgxu9uxO4snaGpiqKUXyohXOW2ndyGXng2E6Xb0jl6/qifIlIUBacT5syBDz+EdT7ri5aKctGHoPhPdHA0dpfn04DdZSfSHOmniLxl2jNJTEukWlg1Ag2B/g5HUUqXywW9e8Pvv2uTUg0GePttePLJUj+1ekIoL1wuz5IV+dm0SatHtH37JZ3qdMZppm+fztxdc6lXsR53NbnLo8z0611eJyIw4pKOXdIWxC8g6oMomn/ZnKgPolh5cKW/Q1KU0rV6tfaTkaFVFcjKgrFjteRQytQTgj+dOgWNGsG5c57bt26F5j7WDj50SJvNnJjouT00VJuRbLFApUrw1Vdwo+91jHef2U37b9rjcDkAiDJHsfGBjQxqPIgDZw/QPKY5HWI7lMS3K7aT6ScZumCoRy2k/rP6c2LsCTXZLZdNJzYxbvU4Uq2p3NfiPu5tcS9CiII/qJRPp05pSSA3lwsmToRnny3VU6uE4C9WK9SqpV3EL9aiBVw8YTAzE9q2hTNnvPdPS9N+QEsuvXtrTxGNGnntOvrn0Zy3nEeiHd/mtPHuH+/yfvf3oW4xv1MJ25u8F6POu5TH4XOHaRLdxA8RlT87T+/khik3kGHPAGBT4ibSbGk83u7xIh/L5rR516JSyp4hj8vyvHmlnhBUk5G/bN7sOxlcYLV6vt62TXuELAyXC5Yv9/lWQlqCOxmAdhE4fO5w4Y5bxmqG18Tq9Px7sLvsVAut5qeIyp8pW6e4kwFo/S0f/fVRkY6xN3kv9T+tT+D4QCq8V4FfD/xa0mEqRZFXmft8aCmKAAAgAElEQVTI0u/XUwnBX/QFTAALuOhOLTjY+6khv2OH+V5e4sZaN3p0zJqNZrrX7V6445axmhE1eevGt9y1kYIMQXxx6xdUCCriuhBXMgGCS28eckkXN313EwdSDiCRnLOco/+s/hw7f6wEg1SKpH17CPHRJPrqq6V+apUQ/KVVq7wzfvfucHEbcNOm0LWr9/aLBQZC1aowcKDPtz/q8RHdandDL/QYdAYeaPUAI1uOvIQvUDbGtB/Dtoe2MXPgTHY+spP7Wt53ScfZcXoHjT9vTPDbwbT6qhX7z+4v4Uj9474W92E2mt2vzUYzz3R8ptCfP5V+ijNZZzyeGg06A5sS8y1CrJSm4GDYvRtic5V/0emgTx8o5YrP5aK4XWFdccXt0tK0i//ff2uvhYBnnoH33vO9v8OhLZQzdy7s368tq9m0KVSpktOpHBMDI0dqHc35sDqs6HVaUrjSpVpTqf1Jbc5mnQVAJ3TEhMRw6IlDV0Sb+ZbELby+5nVSranc2+Je7ml+T6E/a3FYCH833KNsebAxmBX3rOC66teVRrhKYVWsCCkpOa8NBpgwAZ54osiHKmxxO5UQlHLvZPpJ5u6ai9PlpF+jftSMqFmkz687uo5e03uRak11bwsJCOHvkX9zbdS1JR3uZee/G/7L078+7W5+6teoHz/0/0GNVPK3kBDvfsPrr4e1a4t8KFXtVLkiHDl3hJZftSTdlo5Lunjpt5f4e+TfNI5uXOhjVAis4B5me4HdaS83cy387eE2D9O2Wls2J26mZkRNutfprpJBedCuHfz2m+e2v/7S5isV1Ad5iVQfguLlwNkDfLHhC6ZsnUK6Ld2vsbyw8gVSLCnYXXac0kmGPYORi/Lv87A4LNz7471UeK8CsR/HsvvMbvo27EuwMRid0BFsDGZkq5FqJbdcWldtzQOtH+DmujerZFBe3H237xUUXaW34rB6QlA8rD2ylu4/dMcpnRh0Bt5Y8wZbHtxCeGC4X+LZcHyD17Ztp7bl+5mHFj/ErJ2zsDgsnLOcY9iCYay8ZyX9GvVjb/JemkY3pU/DPqUVsqKUjF69tL7B8+e1EYZBQXDLLb6TRAlRTwhXopQU2LED0nPu7idvnUzsx7HETIjhhRUv4HR5l8lwuBz0nNYTq9OKw+XA4rBw7Pwx/vPPf8oyekArr/H8iudJs6V5vXdhsprT5WTtkbUs3bfU3WEMsGjPIiyOnDkeFoeFJfuWMKjxIF7u/DJ9G/VVd8FK+Ve5Mqxfrw08ufZaePBBmD69VE+pnhD8Ze9eWLRIG2FkMmklKUaNKlLb4Jcbv+SFFS9gcVro07APU/pOIWjaLHj4YRwBBhbUc3DqyZHIBg14fuXz7hIQn/7zKYGGQMZ1GedxvCX7lpBl95wy75AOTqSduOSvmW5LRyAIDggu9GeSM5Np9t9mJGcle7X9g9b+P3vHbD7b8BlbT25FJ3TohZ61962lcXRjQgJCSLHkjM4I0AeouQvK5alhQ1i2rMxOpxKCP/zxB/TooZWjuDDKa/58WLUKZs8u1CGW7V/G2F/Hui/yi/Ys4tHZw/nmkcU4bBa63gVbYsC55z/YD+lxypwngkx7JjN2zPBKCGezzqIXehzS8yLcrU63In9Fq8PK4HmDWbx3MQC3X3M7UwdMLdQw1+nbp3Peet5nMgDIcmYxdMFQdELnMZO5+w/dOfbUMT695VPunnc3WY4sAvQBRAVHcX/L+4v8HcqlU6fg3nthyxaoXRu++w7q1tWGIRuN2jb19KNcIpUQ/GH0aO/hZFlZ8NNPcOyY54SUPCzZv8Sj6JvFYeHnIysgIIAfa2exNQYyTNlvSu/moVCT9zyF62tcj1FvxOHIuRBXD61O/2v6F+575TJu9TiW7V/mvqj/tPcn3ln3Dq90fqXAz2Y5snw2aeV2cclugMT0REYvGc2Xt33Jb8N/Y8m+JYQHhnN/y/svfUSR1aq14VaqpE0O8ieXS5ucuH+/VgXz9GltVmtMDBw5or3fqZP2e3TxTHdFKQS//YYLIWKFEKuEEPFCiJ1CiKLPtrhcJSf73q7Xe1c5zEO0OZoAnec/+opBFcFmI8kMTh83iXqhRyAwG8180P0Dr/frVazHwsELqRpalQBdAHFV4mhdtTU3fncjjyx+hO+3fs+4VeOI/iCaiu9V5Lnlz+GSOSMepJScyTxDui2dVYdWkeXI+S6Z9kxWHVqV5/eZ+u9U+s7oy4hFI2gV0wqjPqfj7EJZ7sKYsnUKAO2qt+P1rq8zpv2YS08GkyZpk/9q1NAKEe7de2nHKSlHj2oXfnt2MpQSUlO1uDIytN+dtWu1yUuKcgn8+YTgAMZKKTcLIUKBTUKI5VLKK39B3169tEf93MXt9HqoXh3q1HFvSkxLZNiCYWw7tY3YsFgCDYH8e+pfKgVVYlTrUVQ0VyTVkorD5UAIwd2t7+PMmy6u/2Q8QuTMPDXoDDSJbkL/Rv2xOCwMajyIFjEtfIbWvW53jo85zrHzx2jy3yZsPrkZl3Sx6vAqJm2e5NH09Nk/n2HSm3jjxjeIT4qnzf/auAutxQTHoEePE21/vdBTt4LvcqoT/pzAuNXjyLRnohM65sfP58ObP+STvz8h055Jv0b9aF+9PfcvvB+rw4qLvIfd6UQJ3eNs3QpPPZVTZDAhAW69Vbs795fgYG22em5Op2eNq6ws+Oefso1LuWL4LSFIKROBxOw/pwkh4oFqwJWfECZO1MpWzJ2r/YMOCYHOneHrr92lb50uJ12+68LBswdxSAdnMnPKXmfYM3hp1UuAdvdsd9mRSF5e9TIvA7oRggqOAJw6ic3loHWV1iwcvJDKIZULDG3D8Q28uPJFtp3a5p4MdoHzoqanLEcWb659k6nbp5KcmexRdfNkxknt4px9rXJJFzXCa/g857vr3nU3f7mki/OW8zy29DF0QodAcDr9NN3rdGfV8FX8sv8XPlr/kces4wvMRjOPtX2swO9YKJs2ebbFS6mtR2G1aoMASti+5H0cPX+Ua6KuyXt+RFQUDBsGM2dCRgZZRkG6URJhBeOF/zSBgdCsWYnHp1wdykUfghCiFtAS+Nu/kZSNo5bT9OywhYPXCKqG1uCnu37ymnl7KOWQOxnkJ3ezzAUuJMkGG7ggQBdAz3o9qRxSGYvDwom0E8SExHg0waRaU/liwxdsPL6R+bvnexQ6K4xD5w753O7RnIRk/NrxvNT5Ja+7+IsTjUR6dCjP3DmTvxP+ZufonbQLqE2no/DIv++yN9SKUWfEoDPQPrY9A64ZwMNxDxcp9jzV8JG8QkJKpm0+IQFOnoQGDSAsjLfWvsWba94EtL+zb/p8w7Dmw3x+9PxnE0hr04ilU15iWwUbCxrB71MgOlMQYjAjmjaFF14ofozKVcnvtYyEECHAGuAtKeV8H++PAkYB1KhRo/WRI0fKOMKS5XQ5iXg3gnR7zhyBQH0giU8nerR1P/3r03z414fFOpfRCWlvw9YqgpQfJjHorzG4pAuJZGr/qfS/pj9p1jTqfVqP05mni3WuwtAJHekvpBNkDPLY/syvz/DFxi88Oskvphd6/op5mTZPfYDToENaLPw87DoOjOhf5E5jKSV2l92jsF2GLYNHlzzKikMriAmO4cte/6X1cxPhxx+15jyHQ3uiu+WWfI/tdDn5fMPnrDu6joaRDXnu+uc8V3d780146y33U8bhaV/QcMv92FyeyyMmPJVAtbCcdR9sTht3zLmDpfuW4pIuhBDupBnggOuSTMwasoCYjjeXWlkD5fJV2FpGfh02IYQwAvOAab6SAYCUcpKUMk5KGRcVFVW2AZaCXw/86pEMACxOi9dawT/v/bnY57Lr4N3rofVxSZ07RpFmTSPDnkGmPZOhC4ZyMv0k/9343zJJBgD1KtTzSgYA73V/jxEtR+Rb199gc9L08fGQmYk+NR2DzUH379fRJjW0SMlgxcEVRL4fSeD4QGp/Upv4pHgABs8dzMydM0lITWBj4kY6TenMurcehF9/hcmTYdeuApMBwD0L7uGFlS8wZ9ccJvw5gY7fdsTuzO4E3rgR3n1Xa3ZKTYXUVKoOfdjn8NqP13/s8fq11a+x/MBydwmP3J+xGWB9dUlQ2w4qGSjF4s9RRgL4BoiXUhZtiafLmNWhdVIKF9y9DcavgLv+BSk9O0rDTL4XuClQ7gc+Ae90gmQzGO0Sfa5TWOwWqn9UnRdWll3zwrBmw+gypQs9pvbg9yO/u7frhI5udbr5TBYXRGeA66IaLjYdfDb9CQr7lHsi7QT9ZvYjxZKCRHL43GFu+v4mbA4bS/cv9ZjdnOXI4sbvb+L5zEUwYADU9K6wKqX0mMiXnJnM3Pi5ZNoz0Tvhox8t/PX4v4jwcBg/HuLjvYauGjOyQHp3kruTSLY1h9d4NQ/qhA4dWm2mse3H+q28iHLl8GcfQkdgGLBdCLE1e9uLUsolfoyp1N1Q6wYCdEamzLHTey+E2CHdCEFBP8LUOwDtQuN11+sCRPZPXnxcFwMccDoYbr0bnLmuRS5cPvcvLQadgfFrx7snkq07uo7lw5bTIbYDAN9t/S7fJqOTIeAQngEHOGF7BTup1tRCXQy3JG7xmhh33nKexPRE9Do9TqdnX4bdZeezfz6jb8O+tI9t7/He6sOrGTBrAOet54kJieHnIT9TObiyu39k3Gq4518w2wF7FrzzjrYe7kVJTYSF0blmU1YfXePephd699/LBfUi67HhxAb3/Aujzkj76u3pXrc7raq04tb6txb4/RWlIH57QpBSrpNSCillMylli+yfKzoZAFQIqsBndR+n7x4tGYD2//r5C+DgQQDWJ6xn1eGLxuwXlAzw/b5DBy/fAAlhhfh8MV0YFeSLS7o8ZhVn2jOZuH4i9y+8n6ofVmX+bp8thm52A/S9C1IDtJ8sPTzZE1KqRxb6aSomJMZrQpvVaeWNNW/Qrmo7THrv0UMCwZ7kPR7bkjKS6D2jNymWFFzSxYm0E3T7vhsRgRG0iGmBSW+i324Izn2qzExtXeznntNGAoWFaT+LFvHLsGW0jGnp3tUpnQyZP4Trv73e3aT1frf3iQmJITQglNCAUKqEVmHOoDm83PllAOp8UodK71fi3h/v9So/oiiFVS5GGV1N3lzzJrPXf8KJjlA5A+7bAoFOtOGmqdpQyhdXvug9E7ewF3OB+85fJ8Gmh0WFXzrgkpj0JtpXb09sWCzLDiwjOUubeCeR7pFGvhLFmiNrOGc557FaV35W14aGLwRT7YyN0xFGssLM/Hr30kIXqqsZXpPaEbXZfWY3OnQ4pROndPLt1m8B0KEjQB/gEY9Eei2is+P0DvTCs63e6rRy5PwRlg1dxmNLHiMjfA6upKycOy6DQZtR/OqrcN99OaOMwsMx4T1SyyVd/HnsTzp824Hdo3dTOaQyu0bvYuXBlZy1nKVHnR5EB0ezJXELA2cPdDcnzdo5C7vTzrTbpxXq70RRclMJoQy5pItxq8chK0p2dNG2PXorzJsF/dJCoVEjLA4Lq4+s9vygpGh39wJiUuFMMDjKoI/R6rQSExLD3Pi57gtToD6QSHMkZzLPoBM6DDqDR+XSIH0QZ7PO5lmvyBez0cyDHZ5mbPuxJGUmUT2seqGXwEyzptFqUitOpp/UEgHepTFcuIgwRWBz2XC6nNicNl64/gXaVmvrsV9MSIxXErM77USZowgzhfFd/++g9lNaGQm7XZvPEBrK+bGP8tiCe9hwYgMNIxvyRaMvqIrW1OXr7+HCovexH8cSZgojJiSG0xmnybBn4HA5uLfFvaRb0939UqCVMJm+YzpWp5WPe3xMbHjBZVAU5QK/Dzstist9Cc3tp7bT7EvvSUMBDjh0559UbdKeNYfX0OW7LsU7kQSjQ2tmKe1movzohbZmc+6mIoEgNiyWb/t+S89pPQuVEOpG1CXSHMngpoN5st2TSCR7k/ficDloVKlRoQvmjfpplMfkOV9CA0JJHJvI/rP7qRxSmZiQGJ/7PfnLk3y9+WtAu3C/1uU1nulw0eL2hw9jmTeLRQeXMq2Bjb/tB0mxpGBz2jAIA5VDKvNo20cRCLaf3s78XfPJchatuceoM/qs6wRQMbAiu0bvKtSEROXKppbQLGeklNw63XfHn8OoY2dQOlWB/SnFK40Q4IC6yRAfTU7zUSGTgnBBjfNwJKLwn8mPUzq9OmolkpPpJxkwe0Chnw7qVqzLsmFaCWCLw0KPqT3YeGIjOqGjVkQtfr/393zLW59OP82y/cs8ElNebq5zM8EBwTSPaZ7vfu91e4+utbpyJvMMzWOaE1fV+9+ao0Z1rguewe4qu7Ge8zy3Qzo4nnacV1Zpxf5MehODmw7mx90/ct563mNSX37ySgYAabY0Zu+czWPtSmj2tnLFUwmhjJzNOsvx1OO+35S4yzrsTy5GQpDw3QLovReW14Hb7wRXYS/sEqSAyAw4GlG6A5BsLhs2a+H6DQB2JeVUM3ln7TtsOL7B3TS1N3kvT/zyBN/3/x67087GExtJTE8kPimepfuW8kfCH4U+j1FnpGf9ngXu99XGr3jilycQQhBljmLFPSt87rf15FYOpBzINxFdSIoOl4P9Z/eT/Gwy32z5hld+e4WTGScLHbsvF8/4VpSCqIRQRkJNoXmWhOhYoyMNKzUEKNRdrE8SwrNg8E7tZbeDMHwrTG5JwU8Jud7fWblMR6MWSq2IWu4/b0rc5DEe3+a0sfXkVlKtqXT4pgMHUw76LOdRGHaXnSd+eYKGkQ3pVLMTyw8sZ/eZ3TSObsyNtW8EtKGrY34d4/7vlJCaQK9pvdj3+D5AWxDI4rAQGRSJlDLPORIC4fX7kJSZhBCCptFNOWc9V+T4deg8Cv8FGgLp26hvkY+jXL1UQigjAfoATHqTzwv++C7j3X/u07CP1yzVwtLlur6E2KHxadBLz/kHXi5KFlYjfs0IwfpgMpw57fwmnYnH2j2Gw+XAoDPQIqYFKw+tdE8iM+qMNI5qTJ8ZfdiZtLPY58+yZ7HswDLm7JrDt1u+xSmd6IWeh+MeZnCTwXy/7XuPi7xEciDlAJuOb+L9P99nXvw89Do9LSq3YPGQxVQyVyLjvGe/hU7ofCaKhpHaTcEHf37gMUkOIDIoErvTTqrNu6jfBdVCq1GrQi0OnTtEvYr1+LjHx9SpUCfP/RXlYmpN5TIUbPS9jGS76u3cf+5SqwttYtpc0vGjc1XESDfC9sqelZELzY8d0RnODOYNmsf7N72vDfcUcP/C+2n8RWNOpZ/ipU4v0TKmJUGGIHTosLvszNw5kzVH1hR88EIwGbS5CF9v/poMewYWh4UMewYf/fURnSZ34ustX/ucMdzh2w7M3jUbp9RGJ209tZVHljzC7IGzvYaoGoTBq8CfXujpWU9rrvJ109AgskHeVVCznco8RbPKzTj21DFWDV+VZ4lzRcmLSghl6M4md/rcbjJ6TohaP2o9r3R6hdoRtWldpTXv3PhOoY7fIAkyjJBlgPnXwPfNtWu7Pr/Fx8rhaou3z76dZ1c+y/7k/VidVjLsGRxKOcSon0YRaAjkk56f4HA68l0Xoaj0Qk+QIYgqIVXoWqurxwI9oA1JzXJkecymDjOFYdQZ0QmdV3E6m9PG+mPraVu9LWPaj8FsNBMSEILZYOa9bu9x+zW3E2TIKdURZAyiV/1eADwS94hHNVqz0cyjbR8tcAKezWnjl/2/XPLfgaKoJqMy9EWvL1hzeA27zuR0ki66c5H7z4dSDrE+YT2VzJV4retrvHHjG+73qodVZ9iPvksiAyDgp2uhbRSkm7SOYQCnBLMNMi/Dmme5L7J2l53lB5eje6Pk72FMehNPd3ia6mHVubvp3UhkoRbaqRZajeTM5DyLA9aM0Oofvd/9fQZcM4D9Z/fTJLoJLWJa8JDjIZ7+9Wl+2f8LlUMq8/mtn7vnDNxS/xamD5jO+LXjcblcPNn+SYY0HcJfCX/x76l/sTi15iS90HtM/gOoHKyGmCqXTs1D8IN0WzpHzx3l2uicGbC/HviV/rP6u/+RXx97PYuHLEavy7mSB7wRgF3mMcwwr47jok5qu4L1b9iftUfXciYrZ7GhCFMEi+5aRKeanTz23XRiEwNmDyAhNYHKwZVJsaR4tOv76hTOLUAfwNYHt3JN1DUlFn+6LZ3uP3Tn31P/IqWkWeVmJKQmcM5yDpd0odfp+f3e32lZpWXBB1OuKoWdh6ASQjnw+5Hf6fpdV487PYGga62u/Dj4R0JNoQDc8sMt/HIwjyYBqXUi77yMbhAfaPUAU/+desmjgopi8eDF9GqoNclc3Cmc39OAlBIhBB/++SEv/vYiBp0h3yJ8FwQbg5l9x+wSLzrnki4OphxEIKhToQ6p1lR+3P0jVqeVnvV65rkqnXJ1UwnhMnEw5SDN/tvM5wxavdDTtlpb/rj/D+LPxBP3ZRxZruyL50V3/joX7PocGl08B6kcPyFUDKpISpZWirq04uxdpzcLhy70qnf0n3/+w3MrnsPqsNKtTjdm3zG7wDb6NGsaSRlJNPhPA69V3i4WZAjiw5s/5OE2JbSCm6IUw2WxQI4Cqw6tyrPpwSmdbDm5hZ2nd9LkiyY5yQA8L54SJi2EOikXHcDHRTa/RWjK2tmss0gkQTZ4eAMlPty1aVRTEjITeGrZUx4VQJcfWM6zy58l056JUzpZfmA5A2cNLPB4oaZQ6lSswz3N73F3+ury+CckhKBVlVYl80UUpYyoTuUyYnPa+Oivj9icuJkWMS0Y234sJoOpwLtSKSVvrX0r3/bqRklw3zY4UPGiN3xc+4UQhV5QpqxYjDBgJ3zdGuwl2Pm9I2kHEkn8mXjiz8SzbKhW/mLFwRUezVQuXCw/tJx9yfuoH1nf4xjHU48z8qeRxCfF06xyMyb1nsSk3pOoHVGbXw78QmxYLLfVv41ZO2ex6vAqnNKJS7p4+6a3PYYTK8rlQCWEMiCl5Lbpt7Hu6DqyHFks3ruYXw/8ym/Df6NPwz7UCKvB7uTdXp8LNARyS71bOHz+cD4HhzYn4EwQ9PU9qtVDhCmCs5azl/5liiL3E4oEg0ubJCeF925mO4gSzlPNT0i+Wgwx6RZ+q7uCs7ckUDGyOlVCq/jcf9r2abzW5TX3a6vDSsdvO5KQmoBTOjmedpzOkzuz85GdvHLDK7xywyvufYc2H4pLukhMS6RCUAWPYaOKcrlQTUZlYN/Zffxx7A/3XWmWI4sNJzaw/dR2NpzYwDWVrsFsMLubc/RCT8XAirx6w6vMvmM2N9S4Id/j/10VKj8L8Xl1KEvczTFnLWfLrNlI5wKTXfupdQ5uOJz3RLmOD2hrN+SO1ZdoczSDGw8mUBeY/8klrJkCcSegRircud1FyD0PAFpn9sWTxXTovArKbT+9nbNZZ939BQ6Xg8T0RK8Fc9zHEDqqhVVTyUC5bKknhDJgdVi9iozZnXYGzhnIgbMH3M1BBp2B6OBoRrcZzeg2o3lyyZMY3zT6OmQOAXujCwjA645cEhIQQrot3ff+Hh8V2noGdidWARSlSUev45okye5IyeEKcDgij/3yyE/BxmDe7/Y+Rr2R2hW0SXoXqpp2mdKFP47+gUN6F2/TCR2tzgagkxb3HU+QA1i2HBwOggOCeb3L67z5+5vuWcFBxiCGNhvqcZxAQ6BX57HT5STQUEAyUpTLlBplVAaSMpKInuB91dYLvdcFRyDQC73PC11JMgiD1zmaRjflgVYPcPjcYfQ6Pb0b9ubM9r+p+MKbtNuXCQ4H9/eBWc21Wjw6oct7tI2EhpUacvTcEbKcFt/7FCDYGMzWh7ZSr2I9r/cS0xLp/kN39p/dj9PlpHF0Y05nnCbIGMTjbR/n/vhAjI88RqAl17yNgACwWCC7H2XS5klM+3ca4YHhjO863qvktZSSm6fezB9Htac7s8HMjbVvZNFdiwq9SpuilAeXxbBTIURP4BO0+86vpZTv5rf/ZZMQZs6EiRO1ZRNfeol9bevR8quWHkNLC5rYVNoCdAEeM4GNOiOnnzlNRGCu23iXS1v2MSnJvclmMjDvhxcZ0P9FRi4aydTtU30f3wFrR/5B12ndCzVu3xeT3kTSM0nueRgXk1JyJvMMwQHB3s00GRnQvDkcOwY2G5jN2iL348YVKQa7085//vkPW09tpXWV1jzS5pFCLcijKOVJuV8gRwihBz4HugMJwAYhxCIp5a78P1nOzZwJI0Zoi6oD3H47FWZO9ipYVhZPAXmpGFgRi8PiTgh6oadBZAPPZACQnOxe5/mCgIAg7pKNwWBiUONBzN89P+eCn92JHGiH9kkm4mKvo0VMCzYnbsbisLj7LiQSgSAqOAojRo5neK8TEWQI4tNbPs0zGYA2YioqOMr3m8HBsGkTfPyxlhR69IBBgwr3F5SLUW/kqfZPFflzinI58uetTltgv5TyIIAQYibQFyh3CeHMj9P5ZeEEdKHh9LrnTcLjrs9754kTc5IBQFYWG1++D8ftnhf/x9s9zkfrPyqliHOY9WZ0Oh3hpnBiw2PpWa8nr3R+hXXH1nH3/LtJykiiZUxL5t05z/vDFSqA/qJOA6cTatcGoHfD3nx080eMWz0OS8Z5IlMsBDvg+lOBfPDSanQ6HcuHLefVVa/yz/F/uDbqWkx6E9tPb6dp5aaM7zqeNYfX0G9mP481jg06AyvvWUn72PbF+/Lh4fDaa8U7hqJcRfyZEKoBx3K9TgC8Bm4LIUYBowBq1Cj7afmHBvegTc1fsWaPVAyZ2Ykth74l5o77fH/g4gsokOZjndzfDv9GhCnikhZCAehWuxs96/VkRKsR3nf2hdC5ZmeOPXUs/50MBpg+HYYMAaNRWzD+4YehTU557gfjHuTBuAe1FykpcOYM1KyptdejVeqccPOEPE9hMpgwB5hJs6W5t+mEjgaRDYr8nQrD7rRj0BlUH4Ci+ODPYad5lWLz3CDlJCllnJQyLiNZlTIAAA4VSURBVCoqj+aBS1GlCggBTZrAtm2+99m6lTH8SkqgVkE03QRnzPDyrAfzPu5LL0FQTlljV1AgH3fw/qoGYWBKvymYDWbMRjNB+iCP9wWCWhG1+Hvk33Sp1YUKgRUICwijZUxLNo/azPJ7ljO2w9hLSgZF0rcv7N6tJYb162FC3hd3KlSA+vXdyaAwrq9xPZXMlTDqtNFUQYYgutXpRqQ5sriReziZfpI2k9oQ+FYgIe+EMHnL5BI9vqJcCfz5hJAAxOZ6XR04URonkkLgRMt+gosy0c6d0KIFfPABPP205we3bychDFy50qZDD0fNeS9szq23wsKF8PnnYDAgxo5Fv/tZOLrOvYtO6Hij6xvcUv8WNozawB9H/yAqOIqQgBBG/TSK5Kxkbqh5A9/3/56IwAhWDV9V/L+E4oiN1X5KQZAxiL9H/s2TvzzJ4XOHuaHWDYy7oWgdv4UxYNYAtp7aiku6yLRn8ujSR2kc3Zi21dqW+LkU5XLlz4SwAagvhKgNHAcGA0NK+iQuoXVlXmjIybOh4Jln4MkntWaSC9q35+YpsDMKsrJves026Gmvmf9Ju3fXfrLPt6rdKsb8Mob5u+cTGhDKRz0+4pb6twBwbdS12spg2Q4+cbCI3/DyduTcEW78/kaOpx7HJV0MuGaAe9WykrThxAaPuSAOl4M/jv6hEoKi5OK3JiMppQN4FFgGxAOzpZTFXxQ3t+xkkPsnX+cuas+vV4/Xur7OgHjQu8DghGEHQ3jy801FCsOgM/DprZ+SMCaB+Efj3clAgX4z+3H43GGsTit2l51XV7/K2iNrS/w8FQIreLw26ozEhMSU+HkU5XJ2ZU9ME6JoVZVdLq1f4WKpqTh2/Ito3AR9eCm32V9lDG8YPCa3BegDeOemdxjTfkyJnufnvT8zaM4ghBAIIWgZ05Lfhv+m5hQoV4VyPw+h3Fm40HcyAAgLw9Ahn6GmyiWrHFKZE2k5XUdGnZGa4TWxOW2ct5ynkrlSiYwI6tWgF5sf3My6o+uINEdyW4PbVDJQlItc2f8isle78vUM5HGJycqCQFWfxh9m3D6DW6fdil6nx/n/9u48xsryiuP49wwMOyOCRDYRpCxFbUNFS0sVDZYApe5xSyuKKxRr3Teiif/gAlVj3QiKJpJigjQlDVhRGo1ECAjGDSHUIowsUkFGYAQGTv84d8oMDDMDc+d9h/v+PsnN3OW99553YO65z3ae/fsY3ns4m3Zsov3k9hhGl3ZdeOead+jTsU+D36v/Cf3pf0L/PEQtUpgKu8uoUm6WUQXQ/Ku1NOtZx6CwJOrrsq9ZumEpnVp3onXz1gx7ZRi7KmJxn2H069SPLyYeWh5cCsCGDTBjRnwpu/RSGKT9oBuDuoyqcqcZR1aoU5LTvaQ73Uu6A/DCsheq1XhynNXfrqZif4W6eApNaWnUmyorixXwTz4Jc+fC8OFpR5ZZ2g9BmpSTjzv5kL0KOrTqoGRQiJ56CrZvh4qK2Chj1y648860o8o0JQRpUkb+aCSj+46mbXFbSlqW0Ka4DbMum5V2WHKkysvh88+jQOLhbN0aLYOqvv++5mMlEfraJU2KmTHrslksWr+IzTs2M7jbYE7uoDGfY8rSpVFdtqIiSo9Pngy311Ax9oor4PXXDxSDbNMGrrwy2VilmmwMKotIMtzhxBOr7aFBmzawaFGUiDnYq6/CpEmwezdccw089liNBSKlYTSoLCLJKys7dMV/URF8+mnNCWHs2LhIk6AxBBHJn5KSatV+gagA0LdvOvHIEVFCEJH8MYM5c2LHuuOOi+Rw663w80O2OilMc+dCnz7RbTZhQnSFvfIKjBkD48bB2rVpR1grjSGISP5t2RKl5bt1g36Ns9lRk7NkCZx3XsywgkiGp58e3WW7dkXXWUlJzL7q2jXR0Oo7hqAWgojkX+fOcO652UkGEPXQyqvsjlheHjOuKmdR7d8f12c13WnUGlQWkWwoK4sP4507YdQoGDAgv6//8cd1H+N+6NqLJkQJQUQK37ZtUSdpy5ZYHzFpEsybB8OG5e891tewR3nnzrBjx4FWQsuWcMkl+XvPPFNCEJHC9/zzsHFjLJSD+DlhQoxz5EvHjtVvm8XOiQMHwhtvQKdOsVXvKafk7z3zTAlBRJKxejU880z0rV97LfwqwT1GvvnmQDKotGZNlMpo3z4/7zF5chTmKy+PZNC2bbREBgyABx7Iz3s0Mg0qi0jjW7UKzjgDnn0WXnopSlvMmxf96U88AaNHx/TUb7+FF1+Eyy+Pfc63bavf65eWQv/+MbOnVy9YubL646NGQYsW1e+rqIBbbsnL6QEwZEjMNLrnnkgAK1bkf5yikaUy7dTMngB+C+wB/g1c5+7f1f4sTTsVOWZs3gyffBLTK089FcaPjw/6qp83gwbFtMzZs6OPvUWL2Khqzx744YeYptmuHYwYEa9x993xrbvSV1/BlCkxmPv++zGLp1JxcVROXbAgYpgyBW64IY6rqnv3SCYFrr7TTtNKCCOAhe5eYWaPAbj7vXU9TwlBpAlavx7uuw/WrYtv/meeGQOnu3dHC6CkBIYOhfnzqz+vY8eoeFofrVrFt+333oPp0+Gjj6JffufOwz+nuBj27o3um5ISuPlmePrpiKvSWWfFt/oC16RrGbn7W1VuLgYuSyMOETmMtWvjg37nzri+bl10+3TtCtdff2Bns61bYfDg6OrZtw+WLz+QCCqVlUUyKCqq/i2+vskAosWwalW0KDZvjtt12bs3frpHq6NbN+jd+0CLoKgIpk2rfwwZ0BQGlccBr6cdhIjkPPQQPP549LHXNGd++nR4881YeDZ/fnT3VB5XOb2yJlWTwdEoL49uoqNhFlVXV6yImMvLY1VxwiuGm7pGSwhm9jbQpYaHHnT3v+eOeZDY6nhmLa9zE3ATQM+ePRshUhH5v6VLYerU6t0qB9u9G+66C5Ytqz4m0NRUtkiaNYtkcNFF0fV08cVpR9ZkNVpCcPfza3vczMYCY4DhXstAhrtPA6ZBjCHkNUgRqW7VqvggrUtZWfwcPTpm9pSXJ78Ct3nzGIhu3z5aANu3x/3FxXGZOBE++CAGjh95JBaJSa1S6TIys5HAvcAwd6+ljSkiiRo4sO6uHTO4+uq43rEjfPhhTLVcvz7654+2W6e+zGLF74gR8PLLseALouWycGGMF5x99qELxaROac0yWgO0BCo3XF3s7nVOCNYsI5EEPPooPPxwJIaKiuqPFRXBjTfGeoKadjZr0eLAYG4+9esXCccsyku/+y4cf3z+36dANelpp0dLCUEkIZs2xaWkJFbzFhVBhw7R/VJbl5JZ48Szb1/MdNqzJxKCttk8Ik162qmINHFdusQlbeecE60BiBXI0qhUukJE8ueOOxr+GqWlMXvJ/UAykEQoIYhI/kydCs89V/dxxcWxonnGjBinqEwA7tEtJalQl5GI5Nf48XGRY45aCCIiAighiIhIjhKCiIgASggiIpKjhCAiIoASgoiI5BxTpSvMbAvQkMpZJwD/zVM4x5osnzvo/HX+2T7//u7evq6Djql1CO7eoPq1ZrasPvU8ClGWzx10/jp/nX99jlOXkYiIAEoIIiKSk7WEkOUdtbN87qDz1/lnW73O/5gaVBYRkcaTtRaCiIgcRqYSgpk9YWZfmNnHZvY3M+uQdkxJMLORZrbKzNaY2X1px5MkMzvJzP5lZivN7DMzuy3tmJJmZs3MbIWZ/SPtWNJgZh3MbHbub3+lmf0i7ZiSYma35/7ff2pmfzWzVrUdn6mEACwATnP3nwCrgftTjqfRmVkz4FlgFDAQuMrMBqYbVaIqgDvd/cfAEOAPGTt/gNuAlWkHkaKngTfdfQDwUzLyuzCz7sAfgcHufhrQDLiytudkKiG4+1vuXrlr+GKgR5rxJOQsYI27f+nue4BZwIUpx5QYd9/o7stz178nPgwyswOLmfUAfgNMTzuWNJhZCXAO8BKAu+9x9+/SjSpRzYHWZtYcaANsqO3gTCWEg4wD5qcdRAK6A+ur3C4lQx+IVZlZL2AQsCTdSBL1FHAPsD/tQFJyCrAFmJHrNptuZm3TDioJ7v41MAVYB2wEtrv7W7U9p+ASgpm9nesvO/hyYZVjHiS6EmamF2lirIb7Mje1zMzaAW8Af3L3srTjSYKZjQG+cfcP044lRc2BnwHPu/sgYCeQiXE0Mzue6A3oDXQD2prZ72p7zjFVuqI+3P382h43s7HAGGC4Z2PObSlwUpXbPaij2VhozKyYSAYz3X1O2vEkaChwgZmNBloBJWb2mrvX+qFQYEqBUnevbBXOJiMJATgf+I+7bwEwsznAL4HXDveEgmsh1MbMRgL3Ahe4+66040nIUqCvmfU2sxbEoNLclGNKjJkZ0X+80t3/nHY8SXL3+929h7v3Iv7dF2YsGeDum4D1ZtY/d9dw4PMUQ0rSOmCImbXJ/R0Mp44B9YJrIdThL0BLYEH8fljs7rekG1LjcvcKM5sI/JOYZfCyu3+WclhJGgr8HvjEzD7K3feAu89LMSZJ1q3AzNwXoi+B61KOJxHuvsTMZgPLiS7yFdSxYlkrlUVEBMhYl5GIiByeEoKIiABKCCIikqOEICIigBKCiIjkKCGINFCWq8lKYdG0U5EGyFWTXQ38mlgVuxS4yt2zsvhJCohaCCINk+lqslJYlBBEGkbVZKVgKCGINIyqyUrBUEIQaZjMV5OVwqGEINIwma4mK4Ula9VORfJK1WSlkGjaqYiIAOoyEhGRHCUEEREBlBBERCRHCUFERAAlBBERyVFCEBERQAlBRERylBBERASA/wGTxrWmRquhSgAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1baf7224a8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%%time\n",
    "def display_2d(X, n_samples = 10000):\n",
    "    pca = decomposition.PCA(n_components=2)\n",
    "    pca_values = pca.fit_transform(X)\n",
    "    X_pca = pca_values.copy()\n",
    "    X_pca = pd.DataFrame(X_pca)\n",
    "    X_pca[\"color\"] = np.where(labels == \"attack\", \"red\", \"green\")\n",
    "    X_sample = X_pca.sample(n_samples)\n",
    "    colors = X_sample.color\n",
    "    X_sample.plot.scatter(0, 1, color = colors)\n",
    "    return pca_values\n",
    "\n",
    "X_pca = display_2d(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 237,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy: 0.9970405503800535\n",
      "CPU times: user 9.49 s, sys: 251 ms, total: 9.74 s\n",
      "Wall time: 9.74 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "y = preprocessing.LabelEncoder().fit_transform(df.label)\n",
    "X_train, X_test, y_train, y_test = model_selection.train_test_split(X_pca, y, test_size = 0.3, random_state = 1)\n",
    "est = tree.DecisionTreeClassifier(max_depth=5)\n",
    "est.fit(X_train, y_train)\n",
    "print(\"Accuracy:\", est.score(X_test, y_test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.2368105, 0.7631895])"
      ]
     },
     "execution_count": 242,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "est.feature_importances_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,\n",
       "  svd_solver='auto', tol=0.0, whiten=False)"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca = decomposition.PCA()\n",
    "pca.fit(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1a167d2e10>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlYAAAFpCAYAAABeYWb6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3Xd4HOW5/vH7UbMs914lF9wxxhjZpoTeTAkcEnoJIRByciAhCSEhPxJCwiEHQiCVhN4JGEgz4NAxJgGMC8ZN7k1ykeUmWbZV9/39MWtnEdrdWWml2ZW+n+vSpS3z7Dxavdq9NTP7jjnnBAAAgObLCLoBAACAtoJgBQAAkCQEKwAAgCQhWAEAACQJwQoAACBJCFYAAABJQrACAABIEoIVAABAkhCsAAAAkoRgBQAAkCRZQa24d+/ebujQoUGtHgAAwLf58+dvd871ibdcYMFq6NChmjdvXlCrBwAA8M3MNvhZjl2BAAAASUKwAgAASBKCFQAAQJIQrAAAAJKEYAUAAJAkBCsAAIAkIVgBAAAkCcEKAAAgSQhWAAAASRI3WJnZY2a2zcyWRLnfzOx3ZrbazBaZ2aTktwkAAJD6/GyxekLStBj3nylpZPjrOkl/an5bAAAA6SdusHLOzZa0M8Yi50l6ynk+ktTdzAYkq0EAAIB0kYyTMA+SVBxxvSR825YkPDYAAAc551QfcqoL/ed7KORU75xCzsk5qT70+cvelxQK17uIyyHnPe6B+0OhiMsuzrKNrDNyXS7KeiXJSXJOcvJuP/Dzebd99j6n8A0Rdd7liPsjag4s+J/bD1z6T+3B6597jiPva3jv5+tj/a6aqumVzXPymL46cXTfZj1GMoKVNXJbo8+JmV0nb3ehCgoKkrBqAEBrq60Pafe+Wu3aV6Ode2u0e1+Ndu6t1e79NaquDammPqTauvD3+pCq60KqrXeqqatXbb2LuC2kmvD32nqnulBI9fVeSDoYnuojQ1RIoaDecVOEhd9xTZKFr1j4dtN/7rSI5Q/cbhHv1g3fuM0+e4tFvRL1ps89RsN1JqoZpU2W3yMvJYJViaT8iOuDJW1ubEHn3EOSHpKkwsLCdv7nAQDBc85pb029tu+p1o691dpe6YWlyMB0IEDt2lejXXtrVFFVF/MxczIzlJ1pysnKUHZmhnKyMpQT/p598LupS262csLLZWVkKCvDlJlhysoMf8/IUIZFXve+Z5opM9O7nmHh7+HL3pcirkuZGSYzry7DvACQmeHFjcyDtVKGmSz8PcNMmRnesgcf88DlDC+sNHp/lMc6sN7I+yTvcQ5etv9cPxCcDoam5iQUtKpkBKsZkm4ws+clTZVU7pxjNyAABKQ+5LRzb422V1ZrR2WNduytVtmeau3YW6MdlV54OvB9e2W1qutCjT5Ox+xM9eyUox6dstUjL0cFPfO863k56tkpWz065ahnXo56hG/rnpetDlkZhAC0a3GDlZk9J+lESb3NrETSTyVlS5Jz7gFJMyWdJWm1pH2Srm6pZgGgvQuFnHbsrdGW8v3avLtKW8r3a2t5lTaXV2nL7v3aUl6l0ooq1TWyzyw709SrUwf16pyjXp076JC+ndW7cwf16pTjfe/sfT8QnjrmZAbwEwLpLW6wcs5dGud+J+n6pHUEAO3cnqpaFW3ZoxVbK7Rpd5W2lu/3glP5fpWWV6um/rNbmHKyMjSgW64GdMvV1GE91b9brvp3y/1PaOrSQb07dVDXjllsTQJaWDJ2BQIAmmjbniot3VyhZZsrtHRzuZZurtCGHfsO3p+daerXNVcDu3XUpIIeGtCt48EQNbC7d7lnpxwCE5AiCFYA0ApCIacNO/d9JkAt3Vyh7ZXVB5cp6JmnQwd21YVHDta4gV01dkBX9euSq4wMQhOQLghWAJBkzjkV79yvT4p36ZONu7V0c7mKtuxRZbX3abqsDNPIfl10wqg+OnRgVx06sKvGDuyqrrnZAXcOoLkIVgDQTBVVtVpUXK5PNu7SwuLdWli8Wzv21kjyPll36MCu+tKkQeEQ1U0j+3VWhywODAfaIoIVACSgrj6klaWVWli8+2CQWl1WeXA26hF9O+vkMX01saC7jsjvoVH9Oisr089pWQG0BQQrAIhhf029Plq3Q3PW7tTC4l1aVFKufTX1kqQeedk6oqCHvnj4QB1R0F0TBndXt47szgPaM4IVAERwzmlNWaVmrSjTeyvLNGfdTtXUhZSdaRo3oKsuKszXxPzuOqKguwp65vFpPACfQbAC0O7tqarVv1fv0HsryzR7ZZk27d4vydutd+VRQ3TCqD6aMqyncrM5LgpAbAQrAO2Oc07LtlQc3Cq1YMMu1YWcOnfI0rEjeun6k0bo+FG9NbhHXtCtAkgzBCsA7cL+mnq9WVSq91aUafaqMpXt8eaPGjegq75+/HCdOKqPJg3poWwONAfQDAQrAG3auu179cxHG/TivGJVVNWpe162jhvZRyeM6qPjR/ZW3665QbcIoA0hWAFoc+pDTm8Xlerpjzbo/VXblZVhmja+vy6fOkRThvVUJjOZA2ghBCsAbcb2ympNn1usP8/ZqE2796t/11x977RRumRKvvp2YcsUgJZHsAKQ1pxzWrBxt57+cL1mLt6qmvqQjjmkl35yzlidOrYfk3MCaFUEKwBpaV9NnWYs3KynPtygZVsq1KVDli6bWqArjirQiL5dgm4PQDtFsAKQVtZt36unP9ygF+cXa09Vncb076I7zx+v/5o4SJ068JIGIFi8CgFIC4tLyvXHWav12tKt4YPRB+grRw9R4ZAezH4OIGUQrACkLOecPlq7U3+ctVrvr9quLrlZuv7EEfrKMUM4GB1ASiJYAUg5oZDTO8u36f5Zq/XJxt3q3TlHP5w2RlccVaAuuZzkGEDqIlgBSBl19SG9uniL/vjuGq0o3aPBPTrqjvMO1YWF+ZynD0BaIFgBCFxVbb3+sqBED763Vht37tPIvp1130WH64uHD+QUMwDSCsEKQGAqq+v05zkb9PD761S2p1qH53fXj8/25p/KYHZ0AGmIYAWg1e3aW6PHP1ivJz9Yr/L9tTp2RC/95uKJOuaQXnzCD0BaI1gBaDU7Kqv18Pvr9NSH67Wvpl6nj+un/zlphCbmdw+6NQBICoIVgBa3vbJaD89eq6c+3KCqunqdM2GgvnXyCI3qxwzpANoWghWAFrNtT5Ueem+tnpmzQTV1IZ17+EDdcPIITjkDoM0iWAFIum0VVXrgvbV6ds4G1daH9F8TB+n6k0fokD6dg24NAFoUwQpA0pRWVOlPs9bouY83qi7kdP4Rg3T9SSM0rHenoFsDgFZBsALQbFvK9+uBWWv03NxihUJOX5rkBaohvQhUANoXghWAJtu0e7/+NGu1XphbopBzuuDIwbr+pBHK75kXdGsAEAiCFYCEba+s1h/eWa1n52yQJF1YmK9vnnAIgQpAu0ewAuBbZXWdHnl/rR6evVZVdSFdVJivG04eoUHdOwbdGgCkBIIVgLhq6kJ6fu5G/e7tVdpeWaOzDuuvm04fzaf8AKABghWAqEIhp1cXb9Gv3lihDTv2aeqwnnr4K2N0REGPoFsDgJREsALQqH+v3q67/rlcizeVa0z/Lnr86sk6cVQfzuUHADEQrAB8xpJN5br7teV6f9V2DereUfdddLjOmzhImRkEKgCIh2AFQJK0ccc+/eqNFZrx6Wb1yMvWj88eqyuOGqLc7MygWwOAtEGwAtq5yKkTMjNMN5w0QtedMFxdc7ODbg0A0g7BCmin9tfU69F/rdWfZq1RVV1IF0/O142njFS/rrlBtwYAaYtgBbQzoZDT3xdu0j2vr9CW8iqdcWg//WDaGKZOAIAkIFgB7chHa3fozleLtHhTuSYM7qbfXDxRU4f3CrotAGgzCFZAO7CmrFL/N3O53ioq1cBuufrNxRN17uEDlcEn/QAgqQhWQBu2c2+NfvvWSj07Z6NyszN18xmjdc0XhvFJPwBoIQQroA2qqq3Xkx+s1x/eXa19NfW6dEq+vnPqKPXu3CHo1gCgTSNYAW2Ic06vLNqiu19brpJd+3XymL760ZljNLJfl6BbA4B2gWAFtBHzN+zUHa8UaWHxbo0d0FXPXjtBx47oHXRbANCuEKyANFe8c5/u+udyvbp4i/p17aB7LpigL00azCloACAABCsgTe2pqtUfZ63Ro/9ap0wzfefUkbru+OHKy+HPGgCCwiswkGbqQ04vzivWr95Yqe2V1frSpEH6wRlj1L8bM6YDQNAIVkAa+WDNdt3xSpGKtlSocEgPPXpVoQ7P7x50WwCAMIIVkAbWb9+rX8ws0hvLSjWoe0f94bIjdPZhA2TGcVQAkEoIVkAKK99fqz+8s0pPfLBeOZkZTPAJACmOYAWkoLr6kJ6bW6xfv7lSu/bV6KIj83XTGaPUtwvHUQFAKvMVrMxsmqTfSsqU9Ihz7q4G9xdIelJS9/AytzjnZia5V6BdmL2yTP/76jKtLK3UUcN76ifnjNOhA7sF3RYAwIe4wcrMMiXdL+k0SSWS5prZDOfcsojFfizpBefcn8xsnKSZkoa2QL9Am7WmrFJ3vlqkd5Zv05BeeXrwyiN1+rh+HEcFAGnEzxarKZJWO+fWSpKZPS/pPEmRwcpJ6hq+3E3S5mQ2CbRlu/fV6Ldvr9LTH25Qx+xM/b+zxuiqY4aqQxbHUQFAuvETrAZJKo64XiJpaoNlbpf0hpl9S1InSacmpTugDaurD+nPH2/UfW+uVMX+Wl0ypUDfO40TJQNAOvMTrBrbD+EaXL9U0hPOuXvN7GhJT5vZeOdc6DMPZHadpOskqaCgoCn9Am3CeyvL9L+vLNOqbZU65pBe+sk54zR2QNf4hQCAlOYnWJVIyo+4Plif39V3jaRpkuSc+9DMciX1lrQtciHn3EOSHpKkwsLChuEMaPNWb6vUna8u07sryjSkV54euvJIncZxVADQZvgJVnMljTSzYZI2SbpE0mUNltko6RRJT5jZWEm5ksqS2SiQzhoeR3XrWWP1lWOGcBwVALQxcYOVc67OzG6Q9Lq8qRQec84tNbOfS5rnnJsh6SZJD5vZd+XtJvyqc44tUmj3OI4KANoXX/NYheekmtngttsiLi+TdGxyWwPS2+yVZbojfBzV0cN76bYvchwVALR1zLwOJFnD+ag4jgoA2g+CFZAk5ftq9du3V+mpD9czHxUAtFMEK6CZGh5HdfHkAt10OsdRAUB7RLACmqHhcVQ/OWecxg3kOCoAaK8IVkATcF4/AEBjCFZAAjiOCgAQC8EK8KGuPqTnwsdRlXMcFQAgCoIVEAfHUQEA/CJYAVGsKavUL14t0tscRwUA8IlgBTQQeRxVbnamfnTmGH31WI6jAgDER7ACwiKPo9q9v1aXTM7X904brT5dOI4KAOAPwQqQ9P4q7ziqlaWVOmp4T912zqEcRwUASBjBCu3a2rJK/WJmkd4q2qaCnnl64IojdcahHEcFAGgaghXapfL9tfrd26v05AfecVS3nDlGV3McFQCgmQhWaFfq6kN6bm6x7ntjhXbvr9XFhfm66XSOowIAJAfBCu3Gv1Zt1x2vLNOK0j2aOqynbvviOB06sFvQbQEA2hCCFdq8ddv36s5Xl+mtom3K79lRD1wxSWcc2p/jqAAASUewQptVvr9Wv397lZ78cL06ZGXqh9O846hyszmOCgDQMghWaHPq6kN6fm6x7ntzpXbtq9FFR+brpjNGqW+X3KBbAwC0cQQrtCn/Xu0dR7V86x5NGdZTt50zTuMHcRwVAKB1EKzQJqzfvld3zizSm8tKNbhHR/3p8kmaNp7jqAAArYtghbRWUVWrP7yzWo//e51yMjN08xmjdc0XhnEcFQAgEAQrpKX6kNP0ucW6940V2rmvRhdMGqybzxitvl05jgoAEByCFdLOh2t26OevLFPRlgpNHtpDT5wzRYcN5jgqAEDwCFZIGxt37NOdM5fp9aWlGtS9o/5w2RE6+7ABHEcFAEgZBCukvD1Vtbr/3TV67F/rlJVp+v7po3TtccM5jgoAkHIIVkhZ9SGnl+YX657XV2p7ZbW+PGmwfjBttPpxHBUAIEURrJCS5q7fqZ+9vFRLNlVoUkF3PXpVoQ7P7x50WwAAxESwQkrZtHu//m9mkV5ZtEUDuuXqt5dM1LmHD+Q4KgBAWiBYISXsq6nTA7PW6MHZa2Um3XjKSH3jhOHKy2GIAgDSB+9aCJRzTv9YuFl3/XO5tlZU6YuHD9QtZ47RoO4dg24NAICEEawQmIXFu/Wzl5fqk427ddigbvr9ZUdo8tCeQbcFAECTEazQ6korqnT3a8v11wWb1LtzB/3yggm6YNJgZWRwHBUAIL0RrNBqqmrr9ei/1un+d1errt7pv084RNefdIi65GYH3RoAAElBsEKLc87ptSVbdefMIpXs2q/Tx/XTrWeP1ZBenYJuDQCApCJYoUWtKt2j219eqn+v3qHR/bro2Wun6tgRvYNuCwCAFkGwQouoqKrVb95cpSc/XK9OOZn62bmH6vKpBcrKzAi6NQAAWgzBCkkVCjm9tKBEv3xtuXbsrdElkwt08xmj1bNTTtCtAQDQ4ghWSJpPi3frpzOWamHxbk0q6K7HvzpFhw3uFnRbAAC0GoIVmm17ZbXueW2FXphfrF6dOujeCw/X+UcMYvoEAEC7Q7BCk9XVh/TUhxv067dWan9Nvb5+3HB96+QRTJ8AAGi3CFZokg/WbNftM5ZqZWmljhvZWz/94qEa0bdz0G0BABAoghUSsmn3fv3i1SK9uniLBvfoqAevPFKnj+snM3b7AQBAsIIv1XX1enj2Wv3h3dVyTvruqaP0jROGKzc7M+jWAABIGQQrxDVrxTbdPmOp1u/YpzPH99etZ4/V4B55QbcFAEDKIVghqpJd+3THK8v0+tJSDe/dSU99bYqOH9Un6LYAAEhZBCt8TuRuP5PpB9NG65ovDFOHLHb7AQAQC8EKnzFrxTb97OVlWrd9r84c318/PmecBnXvGHRbAACkBYIVJLHbDwCAZCBYtXPVdfV65P11+v07qyRJN58xWtcex24/AACagmDVjr23sky3z1jKbj8AAJKEYNUObdq9X3e8vEyvLd2qYb076cmvTdEJ7PYDAKDZCFbtiHNOz87ZqDtfLZKTY7cfAABJ5itYmdk0Sb+VlCnpEefcXY0sc5Gk2yU5SZ865y5LYp9oph2V1frhXxbpraJtOm5kb/3flw5jkk8AAJIsbrAys0xJ90s6TVKJpLlmNsM5tyximZGSfiTpWOfcLjPr21INI3GzVmzT919cpIr9tbrtnHH66jFDlZHBuf0AAEg2P1uspkha7ZxbK0lm9ryk8yQti1jm65Lud87tkiTn3LZkN4rEVdXW6+7Xluvxf6/XqH6d9fQ1UzR2QNeg2wIAoM3yE6wGSSqOuF4iaWqDZUZJkpn9W97uwtudc68lpUM0yfKtFbrxuYVaUbpHXz1mqG45cwwnTAYAoIX5CVaN7TNyjTzOSEknShos6X0zG++c2/2ZBzK7TtJ1klRQUJBws4jPOacnPliv//vncnXNzdLjV0/WSaPZMwsAQGvwE6xKJOVHXB8saXMjy3zknKuVtM7MVsgLWnMjF3LOPSTpIUkqLCxsGM7QTNv2VOnmFxfpvZVlOnlMX/3yggnq3blD0G0BANBu+AlWcyWNNLNhkjZJukRSw0/8/V3SpZKeMLPe8nYNrk1mo4jtrWWl+sFfFmlvdZ3uOO9QXXHUEJlxgDoAAK0pbrByztWZ2Q2SXpd3/NRjzrmlZvZzSfOcczPC951uZssk1Uu62Tm3oyUbh2d/Tb3unLlMz3y0UWMHdNXvLpmokf26BN0WAADtkjkXzB65wsJCN2/evEDW3VYs2VSuG5//RGvK9urrxw3T988YzWSfAAC0ADOb75wrjLccM6+nIeecHv3XOt392nL1yMvR09dM0XEjOSUNAABBI1ilmbr6kH7yj6V67uONOm1cP9395Qnq2Skn6LYAAIAIVmllb3WdbvjzAr27okz/c+Ih+v7po5lBHQCAFEKwShPbKqr0tSfnatnmCt15/nhdPnVI0C0BAIAGCFZpYGXpHl39+Fzt2lejR6+arJPGMOEnAACpiGCV4j5Ys13feHq+crMzNf26o3XY4G5BtwQAAKIgWKWwv3+ySTe/9KmG9OqkJ66erME98oJuCQAAxECwSkHOOf1x1hrd8/oKHTW8px68olDd8rKDbgsAAMRBsEox3nQKS/Tcx8U6b+JA/fKCCUz6CQBAmiBYpZDK6jpd/+wCvbeyTDecNEI3nT6K8/0BAJBGCFYporSiSlc/PlcrSvfoF+cfpsumFgTdEgAASBDBKgWsLN2jrz72sXbvr9UjVxXqpNFMpwAAQDoiWAXsg9Xb9Y1nvOkUXvjG0Ro/iOkUAABIVwSrAL386WZ974WFGtqrkx5nOgUAANIewSogMxdv0XemL9SRBT308FWF6taR6RQAAEh3BKsAvLWsVN9+7hNNzO+ux6+erE4d+DUAANAWZATdQHvz3soy/c+zC3TowK6EKgAA2hiCVSv6YM12XffUPI3o21lPfW2quuay+w8AgLaEYNVK5q7fqWuemKchvfL09DVTOEUNAABtEMGqFSws3q2rH5+rAd1y9cy1U9Wrc4egWwIAAC2AYNXClmwq11cenaOenXL0568fpb5dcoNuCQAAtBCCVQtavrVCVz46R11ys/Xnr09V/26EKgAA2jKCVQtZva1SVzwyRzlZGXr22qlM/gkAQDtAsGoB67fv1WUPfyTJ9Oy1R2lo705BtwQAAFoBwSrJSnbt0+WPzFFtfUjPXjtVI/p2DrolAADQSghWSbSlfL8ue3iO9lTV6ulrpmp0/y5BtwQAAFoR034nybY9Vbr84TnaubdGz1w7VeMHdQu6JQAA0MrYYpUEOyqrdfnDc7S1okpPXD1ZE/O7B90SAAAIAMGqmSqqanXlox9r4859euSqQhUO7Rl0SwAAICDsCmwG55z+318Xa0XpHj321ck65pDeQbcEAAACxBarZvjLgk16ZdEWfe+0UTphVJ+g2wEAAAEjWDXR+u179dN/LNHUYT313yccEnQ7AAAgBRCsmqC2PqQbpy9UZobp1xdPVGaGBd0SAABIARxj1QS/eWulPi3erT9ePkkDu3cMuh0AAJAi2GKVoI/W7tAfZ63RRYWDddZhA4JuBwAApBCCVQLK99Xqu9MXamivTvrpFw8Nuh0AAJBi2BXok3NOP/rbIpXtqdZf/+cYderAUwcAAD6LLVY+vTivRDMXb9VNp4/WhMHMrA4AAD6PYOXD2rJK3f7yUh09vJe+cfzwoNsBAAApimAVR01dSDc+v1A5WRm67+LDlcHUCgAAIAoOFIrjvjdXavGmcj1wxZEa0I2pFQAAQHRssYrhg9Xb9eDsNbp0Sr6mje8fdDsAACDFEayi2LW3Rt99YaGG9e6kn5wzLuh2AABAGiBYNcI5p1v+ukg799bod5ccobwc9pgCAID4CFaNeO7jYr2+tFQ3nzFa4wd1C7odAACQJghWDazeVqmfv7JUXxjRW9d+gakVAACAfwSrCNV19brx+U/UMTtT917E1AoAACAxHDwU4Vevr9DSzRV6+CuF6tc1N+h2AABAmmGLVdj7q8r08PvrdMVRBTptXL+g2wEAAGmIYCVpb3Wdvv/ipxrRt7NuPYupFQAAQNMQrCQ9OHutSiuqdfeXJ6hjTmbQ7QAAgDTV7oPV1vIqPTR7jc6eMEBHDukRdDsAACCNtftgde8bKxQKSbdMGxN0KwAAIM2162C1bHOFXlpQoquOGaL8nnlBtwMAANKcr2BlZtPMbIWZrTazW2Isd4GZOTMrTF6LLcM5p1/MLFLX3GzdcNLIoNsBAABtQNxgZWaZku6XdKakcZIuNbPPfXTOzLpI+rakOclusiW8t7JM/1q9Xd8+ZaS65WUH3Q4AAGgD/GyxmiJptXNurXOuRtLzks5rZLk7JP1SUlUS+2sRdfUh/WJmkYb0ytOVRw0Juh0AANBG+AlWgyQVR1wvCd92kJkdISnfOfdKEntrMS/OL9HK0krdMm2McrLa9WFmAAAgifykisZOmOcO3mmWIenXkm6K+0Bm15nZPDObV1ZW5r/LJNpbXad731ipI4f00LTx/QPpAQAAtE1+glWJpPyI64MlbY643kXSeEmzzGy9pKMkzWjsAHbn3EPOuULnXGGfPn2a3nUzPDh7rbZXVuvWs8fKjJMsAwCA5PETrOZKGmlmw8wsR9IlkmYcuNM5V+6c6+2cG+qcGyrpI0nnOufmtUjHzRA5GeikAiYDBQAAyRU3WDnn6iTdIOl1SUWSXnDOLTWzn5vZuS3dYDIxGSgAAGhJWX4Wcs7NlDSzwW23RVn2xOa3lXwHJgO99gvDmAwUAAC0iHbxkTgmAwUAAK2hXQQrJgMFAACtoc0HKyYDBQAAraXNBysmAwUAAK2lTScNJgMFAACtqU0HKyYDBQAAranNBqsDk4Gew2SgAACglbTZYHXfm95koD9kMlAAANBK2mSwWra5Qi/OL9FVxwxhMlAAANBq2lywYjJQAAAQlDYXrJgMFAAABKVNBSsmAwUAAEFqU8HqJSYDBQAAAWoz6cM5p0f+tU4TBndjMlAAABCINhOsFmzcrdXbKnX51AImAwUAAIFoM8Fq+tyNysvJ1NkTBgbdCgAAaKfaRLCqrK7TK4u26JwJA9S5Q1bQ7QAAgHaqTQSrVxdt1r6ael08OT/oVgAAQDvWJoLV9LnFOqRPJ84JCAAAApX2wWr1tj1asHG3LpnMQesAACBYaR+sps8tVlaG6fxJg4JuBQAAtHNpHaxq6kL664JNOnVsP/Xu3CHodgAAQDuX1sHqneWl2rG3hoPWAQBASkjrYPX83GL175qr40f1CboVAACA9A1WW8r3a/bKMl1w5GBlZnDQOgAACF7aBquX5pUo5KSLCtkNCAAAUkNaBqtQyOmF+cU6engvFfTKC7odAAAASWkarD5au0PFO/dz0DoAAEgpaRmsps8rVpfcLE0b3z/oVgAAAA5Ku2BVvq9W/1yyVf81cZByszODbgcAAOCgtAtW//h0k2rqQuzzmqWuAAASfklEQVQGBAAAKSftgtXzHxfr0IFdNX5Qt6BbAQAA+Iy0ClZLNpVr2ZYKtlYBAICUlFbBavrcYuVkZei8wznhMgAASD1pE6yqauv194WbdOb4/uqWlx10OwAAAJ+TNsHqtSVbtaeqThcz0zoAAEhRaROsps8tVn7PjjpqeK+gWwEAAGhUWgSrDTv26sO1O3TRkfnK4ITLAAAgRaVFsHpxXokyTLqgcHDQrQAAAESV8sGqrj6kF+cX6/hRfTSgW8eg2wEAAIgq5YPV7FVlKq2o5qB1AACQ8lI+WE2fW6xenXJ0yth+QbcCAAAQU0oHq7I91Xq7aJu+NGmQcrJSulUAAIDUDlZ/+6REdSHHKWwAAEBaSNlg5ZzT9LnFmlTQXSP6dgm6HQAAgLhSNlgt2LhLa8r2srUKAACkjZQNVtPnFisvJ1NnTxgYdCsAAAC+pGSwqqyu0yuLtuicCQPUuUNW0O0AAAD4kpLB6pVPN2tfTT27AQEAQFpJyWA1fV6xDunTSZMKegTdCgAAgG8pF6xWle7RJxt36+LJ+TLjhMsAACB9mHMukBUXFha6efPmHbw+9JZXoy67/q6zW6MlAACARpnZfOdcYbzlUm6LFQAAQLryFazMbJqZrTCz1WZ2SyP3f8/MlpnZIjN728yGJL9VAACA1BY3WJlZpqT7JZ0paZykS81sXIPFPpFU6JybIOklSb9MdqMAAACpzs8WqymSVjvn1jrnaiQ9L+m8yAWcc+865/aFr34kaXBy2wQAAEh9foLVIEnFEddLwrdFc42kfzanKQAAgHTkZ1rzxuY8aPSjhGZ2haRCSSdEuf86SddJUkFBgc8WAQAA0oOfLVYlkiKnQB8saXPDhczsVEm3SjrXOVfd2AM55x5yzhU65wr79OnTlH4BAABSlp9gNVfSSDMbZmY5ki6RNCNyATM7QtKD8kLVtuS3CQAAkPriBivnXJ2kGyS9LqlI0gvOuaVm9nMzOze82D2SOkt60cwWmtmMKA8HAADQZvk5xkrOuZmSZja47baIy6cmuS8AAIC0w8zrAAAASUKwAgAASBKCFQAAQJIQrAAAAJKEYAUAAJAkBCsAAIAkIVgBAAAkCcEKAAAgSQhWAAAASUKwAgAASBKCFQAAQJIQrAAAAJKEYAUAAJAkBCsAAIAkIVgBAAAkCcEKAAAgSQhWAAAASUKwAgAASBKCFQAAQJJkBd1As93eLc795a3TBwAAaPfYYgUAAJAkBCsAAIAkIVgBAAAkCcEKAAAgSQhWAAAASUKwAgAASBKCFQAAQJIQrAAAAJKEYAUAAJAkBCsAAIAkSf9T2jTDYU8eFvW+xVctbsVOAABAW8AWKwAAgCQhWAEAACQJwQoAACBJCFYAAABJQrACAABIknb9qcDmKBozNup9Y5cXtWInAAAgVbDFCgAAIEkIVgAAAElCsAIAAEgSghUAAECSEKwAAACShE8FtrL7//udmPdf/8DJrdQJAABINrZYAQAAJAlbrNLIvRefE/P+m6a/0kqdAACAxrDFCgAAIEnYYtVOlNzyftT7Bt91XCt2AgBA28UWKwAAgCRhixXiuv3225t039vvHBLzcU85eU3U+/q/uzDqfVtPmhjzcYfe8mrU+9bfdXaT6uLV6vZuMe4rj/m4hz15WNT7Fl+1OOp9sc5XKXHOSgAIAsEKaIdiTfsRb8qPWB+i4AMUANo7ghWAVhHrOD8p9rF+Td1qCgCtjWAFoE2LtUs61u5oAGgKDl4HAABIEl9brMxsmqTfSsqU9Ihz7q4G93eQ9JSkIyXtkHSxc259clsFgNYT6wMUUvwPUQBon+IGKzPLlHS/pNMklUiaa2YznHPLIha7RtIu59wIM7tE0t2SLm6JhgEg1TX1k6nNrQUQPD9brKZIWu2cWytJZva8pPMkRQar8yTdHr78kqQ/mJk551wSewUARBNryg8p7rQfAJLDT7AaJKk44nqJpKnRlnHO1ZlZuaRekrYno0kAQMuJNZea1PT51OLNpdacaT+AVGXxNiqZ2YWSznDOXRu+fqWkKc65b0UsszS8TEn4+prwMjsaPNZ1kq4LXx0taUWU1fZW00NZELXp1m9zaum3ZWvpt2Vr6Td1a+m3ZWvpt/m1Q5xzfeI+gnMu5pekoyW9HnH9R5J+1GCZ1yUdHb6cFW7K4j12jHXOS6fadOu3Pf2s9Eu/Qa+zPfXbnn5W+qXfaF9+pluYK2mkmQ0zsxxJl0ia0WCZGZKuCl++QNI7LtwhAABAexH3GCvnHTN1g7ytUpmSHnPOLTWzn8tLdjMkPSrpaTNbLWmnvPAFAADQrviax8o5N1PSzAa33RZxuUrShUns66E0q023fptTS78tW0u/LVtLv6lbS78tW0u/LV8rycfB6wAAAPCHU9oAAAAkScoFKzObZmYrzGy1md2SQN1jZrbNzJYkuL58M3vXzIrMbKmZ3ZhAba6ZfWxmn4Zrf5bgujPN7BMzeyXBuvVmttjMFprZvARru5vZS2a2PPwzH+2zbnR4fQe+KszsOz5rvxt+fpaY2XNmlptAvzeG65bGW19jY8DMeprZm2a2Kvy9RwK1F4bXGzKzwgTq7gk/v4vM7G9m1j2B2jvCdQvN7A0zG+i3NuK+75uZM7PePtd5u5ltivjdnpXIOs3sW+G/2aVm9ssEftbpEetcb2aNnkMmSu1EM/vowN+AmU3xWXe4mX0Y/vt52cy6Rllno68L8cZTjDo/YylabdzxFKM25niKVhdxf6yxFG2dccdTrPXGGk8x1hl3LMWo9TOWotXGHE8W5T3CvA+DzQmPo+nmfTCs4Tqj1d5g3vtjo7+XOLXPhp/bJeb9fWT7rHs0fNsi894/OvtdZ8T9vzezygT7fcLM1kX8bif6rDMzu9PMVoZ/Z99OYJ3vR6xvs5n9vbGeY2ruxwqT+SXv4Pg1koZLypH0qaRxPmuPlzRJ0pIE1zlA0qTw5S6SViawTpPUOXw5W9IcSUclsO7vSfqzpFcS7Hm9pN5NfI6flHRt+HKOpO5N/D1tlTenR7xlB0laJ6lj+PoLkr7qcz3jJS2RlCfveMC3JI1MZAxI+qWkW8KXb5F0dwK1Y+XNtzZLUmECdadLygpfvjvBdXaNuPxtSQ8kMt4l5cv7oMmGxsZIlHXeLun7Pn4fjdWeFP69dAhf75tIvxH33yvptgTW+4akM8OXz5I0y2fdXEknhC9/TdIdUdbZ6OtCvPEUo87PWIpWG3c8xaiNOZ6i1fkcS9HWGXc8xaiNOZ5i9RtvLMVYp5+xFK025nhSlPcIea+Dl4Rvf0DSNxtZZ7TaIyQNVYz3gRi1Z4XvM0nPNVxvjLrIcXSfwn8DfmrD1wslPS2pMsF+n5B0QYxxFK3uannnL85obBzF6zdimb9I+kqssdzYV6ptsTp4+hznXI2kA6fPics5N1veJxIT4pzb4pxbEL68R1KRvDDgp9Y55w4k8Ozwl6+D1sxssKSzJT2SaM9NFf5v6nh5n+KUc67GObe7CQ91iqQ1zrkNPpfPktTRzLLkhaTNPuvGSvrIObfPOVcn6T1J50dbOMoYOE9emFT4+3/5rXXOFTnnok1iG6vujXC/kvSRpMEJ1FZEXO2kKOMpxnj/taQfNKEurii135R0l3OuOrzMtkTXa2Ym6SJ5L/R+a52kA1sHuqmRMRWlbrSk2eHLb0r6cpR1RntdiDmeotX5HEvRauOOpxi1McdTnNe/eGOpOa+d0Wpjjqd464w1lmLU+hlL0WpjjqcY7xEnyzv9mxTldSlarXPuE+fc+obL+6ydGb7PSfpYDcZSjLoK6eDz21GNjIloteadb/geeWMpoX5j/Yxx6r4p6efOuVB4uc+9LsVbp5l1kfd7SniLVaoFq8ZOn+PrDzUZzGyovP8G5iRQkxne7LxN0pvOOb+1v5E30EIJtil5v/w3zGy+ebPZ+zVcUpmkx83bBfmImXVqwvovUZQ3wc816twmSb+StFHSFknlzrk3fK5niaTjzayXmeXJ+28rP8Fe+znntoR72SKpb4L1zfU1Sf9MpCC8CbtY0uWSbou3fETduZI2Oec+TaxFSdIN4c38j1mU3aVRjJJ0XHi3xntmNrkJ6z5OUqlzblUCNd+RdE/4efqVvImL/Vgi6dzw5QvlYzw1eF3wPZ6a8nriozbueGpY63c8RdYlOpYa6df3eGpQ63s8RXmOfI2lBrUJjaUGtXHHU8P3CHl7ZXZHhOWo73PNeH+JWRveBXilpNf81pnZ4/L2VIyR9PsE1nmDpBkH/m6a0O+d4bH0azPr4LPuEEkXm7dr959mNjLR50jeP/FvN/jnxJdUC1bWyG2t8rHF8D7jv0j6TiJPpHOu3jk3UV7yn2Jm432s6xxJ25xz85vY7rHOuUmSzpR0vZkd77MuS96ukT85546QtFfe7gzfzDsW4FxJL/pcvoe8//KHSRooqZOZXeGn1jlXJG/Xx5vyXgA+lVQXsyiFmNmt8vp9NpE659ytzrn8cN0NPteVJ+lWJRDEIvxJ3gvRRHnh994EarMk9ZC3+f1mSS+E/6tNxKXyGdQjfFPSd8PP03cV3grrw9fk/c3Ml7dLpybWwk19XWhqXaxaP+OpsVo/4ymyLrwO32OpkXX6Hk+N1PoaTzGe37hjqZFa32Opkdq446nhe4S8LfGfW6yx9TXl/cVn7R8lzXbOve+3zjl3tbzX8CJJF/tc5/HyAmejQczHen8kL8hNltRT0g991nWQVOWcK5T0sKTHEljnAU15XZKUesGqRJ9N/IPlf7dRk4XT+18kPeuc+2tTHsN5u9RmSZrmY/FjJZ1rZuvl7e482cyeSWBdm8Pft0n6m7w/Vj9KJJVEpPKX5AWtRJwpaYFzrtTn8qdKWuecK3PO1Ur6q6Rj/K7MOfeoc26Sc+54ebt1EtmqIUmlZjZAksLfG91VlWxmdpWkcyRdHt7s3hR/VpRdVY04RF54/TQ8rgZLWmBm/eMVOudKwy8wIXkvQn7Hk+SNqb+GN6t/LG8LbKMH1DYmvHv4S5KmJ7BOyTvTw4G/1Rfls2fn3HLn3OnOuSPlvWiuidFbY68LccdTc15PotX6GU8+1tvoeGqkzvdYamydfsdTlH7jjqcYz1HcsRSl1tdYivKz+h5PEe8RR0nqHu5X8vE+l+D7S8xaM/uppD7yjvFNaJ3OuXp5z2/M16WI2pMkjZC0OjyW8sybSNzXep23C9Y5b9fw44rxd96g3xJ5vyvJe4+c4HedkmRmvcLrejVWXTSpFqz8nD4nqcL/DT0qqcg5d1+CtX0s/AkdM+soL0Qsj1fnnPuRc26wc26ovJ/xHeecr604ZtbJvH2/Mm833unyNkfH5ZzbKqnYzEaHbzpF0jI/tRESTfEbJR1lZnnh5/oUef/x+GJmfcPfC+S9aCb6H0Tk6ZaukvSPBOsTZmbT5P1nda5zbl+CtZGbrM+Vj/EkSc65xc65vs65oeFxVSLvYNutPtY5IOLq+fI5nsL+Lu84BJnZKHkfiEjk5KenSlruwidwT8BmSSeEL58sn4E7YjxlSPqxvAOHG1su2utCzPHUzNeTRmv9jKcYtTHHU2N1fsdSjHXGHU8xnqeY4ynO8xtzLMWojTuWYvysMcdTlPeIIknvyjv9mxTldamp7y+xas3sWklnSLo0HHz91K0wsxERz8MXG+sjSu1851z/iLG0zzk3IoF+D/wTY/KOQ2v4ieRoz9HBcSTvd7vS7zrDd18o70NlVQ3rfHEJHu3e0l/yjqNZKS/535pA3XPyNjvXynshuMZn3RfkbYZdJGlh+Ossn7UTJH0Srl2iKJ9qivMYJyqBTwXKO07q0/DX0kSeo3D9REnzwj3/XVKPBGrzJO2Q1C3Bdf4sPGCXyPtkSIcEat+XF/4+lXRKomNAUi9Jb8t7sXxbUs8Eas8PX66WVKqIk5HHqVst71jBA+Mp2if7Gqv9S/h5WiTpZXkHICc83hXlE0NR1vm0pMXhdc6QNCCBfnMkPRPueYGkkxPpV96nfv67Cb/XL0iaHx4XcyQd6bPuRnmvLysl3SU1frJ4RXldiDeeYtT5GUvRauOOpxi1McdTtDqfYynaOuOOpxi1McdTrH7jjaUY6/QzlqLVxhxPivIeIe91/OPw7/ZFNfKaGKP22+GxVCcvFD6SQG2dvPfWAz/DbfHq5G2A+Xf4d7pE3i7lrn7X2WCZaJ8KjNbvOxHrfUbhT/H5qOsub2vTYkkfSjo8kX71ny1mvt/nIr+YeR0AACBJUm1XIAAAQNoiWAEAACQJwQoAACBJCFYAAABJQrACAABIEoIVAABAkhCsAAAAkoRgBQAAkCT/H0riMa2Q0ClkAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1a16737d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "_, ax = plt.subplots(figsize = (10, 6))\n",
    "pd.Series(pca.explained_variance_ratio_).plot.bar(ax = ax)\n",
    "pd.Series(np.cumsum(pca.explained_variance_ratio_)).plot.line(ax = ax)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cumsum</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>0.992789</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      cumsum\n",
       "24  0.992789"
      ]
     },
     "execution_count": 171,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame({\"cumsum\": np.cumsum(pca.explained_variance_ratio_)}).query(\"cumsum>=0.99\").head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1min 17s, sys: 56.8 s, total: 2min 13s\n",
      "Wall time: 1min 54s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "pca = decomposition.PCA(n_components=25)\n",
    "X_pca = pca.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 15.6 s, sys: 892 ms, total: 16.5 s\n",
      "Wall time: 9.23 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "kmeans = cluster.MiniBatchKMeans(n_clusters=num_cluster)\n",
    "y_cluster = kmeans.fit_predict(X_pca)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9     2262816\n",
       "3      324108\n",
       "26     284870\n",
       "19     279067\n",
       "8      266668\n",
       "2      265703\n",
       "4      251196\n",
       "1      207936\n",
       "16     128479\n",
       "28     127851\n",
       "11     109844\n",
       "22      96891\n",
       "18      91815\n",
       "5       60104\n",
       "21      44746\n",
       "24      31088\n",
       "0       28265\n",
       "6       18491\n",
       "25       8778\n",
       "17       4996\n",
       "27       2485\n",
       "23       1035\n",
       "7         651\n",
       "12        548\n",
       "dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(y_cluster).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 224,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cluster:  0 \n",
      "smurf.     2806103\n",
      "normal.        304\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  1 \n",
      "neptune.      866160\n",
      "portsweep.       201\n",
      "normal.           65\n",
      "imap.              2\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  2 \n",
      "neptune.      204570\n",
      "portsweep.      7246\n",
      "normal.         1882\n",
      "ipsweep.         191\n",
      "satan.             3\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  3 \n",
      "normal.          2411\n",
      "warezclient.       19\n",
      "ftp_write.          2\n",
      "multihop.           2\n",
      "guess_passwd.       1\n",
      "warezmaster.        1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  4 \n",
      "normal.             335198\n",
      "ipsweep.               822\n",
      "warezclient.           616\n",
      "back.                  426\n",
      "nmap.                   48\n",
      "pod.                    23\n",
      "warezmaster.            18\n",
      "buffer_overflow.        12\n",
      "satan.                   9\n",
      "imap.                    9\n",
      "neptune.                 9\n",
      "rootkit.                 3\n",
      "loadmodule.              3\n",
      "multihop.                2\n",
      "portsweep.               2\n",
      "ftp_write.               1\n",
      "smurf.                   1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  5 \n",
      "normal.    1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  6 \n",
      "normal.    26\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  7 \n",
      "normal.         19234\n",
      "neptune.         1242\n",
      "satan.            365\n",
      "portsweep.        107\n",
      "nmap.              18\n",
      "ipsweep.            6\n",
      "back.               6\n",
      "warezclient.        5\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  8 \n",
      "normal.        350\n",
      "loadmodule.      2\n",
      "multihop.        1\n",
      "spy.             1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  9 \n",
      "normal.         288201\n",
      "back.             1573\n",
      "warezclient.        38\n",
      "satan.               7\n",
      "portsweep.           2\n",
      "rootkit.             2\n",
      "imap.                1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  10 \n",
      "land.      21\n",
      "normal.     7\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  11 \n",
      "normal.    2\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  12 \n",
      "teardrop.    970\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  13 \n",
      "portsweep.    1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  14 \n",
      "normal.             222\n",
      "buffer_overflow.     18\n",
      "phf.                  4\n",
      "perl.                 3\n",
      "rootkit.              2\n",
      "multihop.             1\n",
      "loadmodule.           1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  15 \n",
      "normal.    1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  16 \n",
      "portsweep.    4\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  17 \n",
      "normal.       4567\n",
      "ftp_write.       1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  18 \n",
      "normal.         135\n",
      "ipsweep.          1\n",
      "warezmaster.      1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  19 \n",
      "normal.         78097\n",
      "nmap.             991\n",
      "back.             190\n",
      "ipsweep.           59\n",
      "pod.               56\n",
      "neptune.           20\n",
      "warezclient.        8\n",
      "satan.              3\n",
      "portsweep.          2\n",
      "ftp_write.          1\n",
      "multihop.           1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  20 \n",
      "normal.         1366\n",
      "warezclient.     288\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  21 \n",
      "normal.         51339\n",
      "ipsweep.           14\n",
      "back.               8\n",
      "portsweep.          2\n",
      "neptune.            1\n",
      "warezclient.        1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  22 \n",
      "satan.        14007\n",
      "portsweep.     1822\n",
      "normal.           3\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  23 \n",
      "ipsweep.        11352\n",
      "normal.          1806\n",
      "pod.               94\n",
      "warezclient.       40\n",
      "neptune.            9\n",
      "nmap.               8\n",
      "portsweep.          4\n",
      "ftp_write.          2\n",
      "satan.              2\n",
      "loadmodule.         2\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  24 \n",
      "normal.          8\n",
      "satan.           1\n",
      "guess_passwd.    1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  25 \n",
      "normal.    49\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  26 \n",
      "normal.       6157\n",
      "portsweep.     864\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  27 \n",
      "normal.          61\n",
      "guess_passwd.    51\n",
      "rootkit.          1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  28 \n",
      "normal.       138963\n",
      "smurf.          1782\n",
      "satan.          1035\n",
      "nmap.            230\n",
      "pod.              80\n",
      "portsweep.         9\n",
      "teardrop.          7\n",
      "rootkit.           2\n",
      "spy.               1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n",
      "Cluster:  29 \n",
      "normal.         42326\n",
      "nmap.            1021\n",
      "satan.            460\n",
      "portsweep.        147\n",
      "ipsweep.           36\n",
      "pod.               11\n",
      "neptune.            6\n",
      "warezclient.        5\n",
      "teardrop.           2\n",
      "ftp_write.          1\n",
      "loadmodule.         1\n",
      "Name: Category, dtype: int64\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i in range(num_cluster):\n",
    "    print(\"Cluster: \", i, \"\")\n",
    "    print(pd.Series(df.Category[y_cluster == i]).value_counts())\n",
    "    print(\"\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 178,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([689.23443365, 621.3949061 , 621.39031029, 440.30982984,\n",
       "       421.46888954, 406.4107485 , 393.79130488, 365.76066498,\n",
       "       345.07124035, 291.67571775, 278.80037374, 277.30149122,\n",
       "       277.21147369, 277.2062876 , 257.45249046, 232.46404824,\n",
       "       225.9500618 , 219.42014587, 214.72560286, 189.18878055,\n",
       "       184.23841188, 177.53093176, 176.07001054, 171.907339  ,\n",
       "       171.5358193 , 169.93502893, 168.03925748, 167.75202767,\n",
       "       167.60447674, 161.94584276, 161.81064075, 160.36691135,\n",
       "       155.14661059, 154.30681006, 153.93479222, 153.92025291,\n",
       "       149.13032213, 148.65314661, 145.30075983, 142.48819667,\n",
       "       138.94519965, 138.73436868, 138.69564646, 138.62820071,\n",
       "       137.87648857, 134.95281781, 133.36937893, 133.14764103,\n",
       "       132.80457913, 131.49850543, 130.45669101, 130.10272335,\n",
       "       129.55642448, 129.27921177, 128.89958802, 128.87403315,\n",
       "       128.86896531, 128.837752  , 127.09822912, 126.74857916,\n",
       "       123.80487929, 123.7788794 , 123.74921296, 123.72026691,\n",
       "       123.71962593, 123.71714541, 123.71498633, 123.71491887,\n",
       "       123.71399852, 123.71220955, 123.71117236, 123.71102159,\n",
       "       123.70958796, 123.70921104, 123.707626  , 123.70543123,\n",
       "       123.70424887, 123.70406444, 123.70299678, 123.70226455,\n",
       "       123.70051127, 123.61000927, 123.38736955, 122.44027687,\n",
       "       122.38480992, 122.37976551, 120.44710575, 120.13354292,\n",
       "       119.92699374, 119.84170685, 119.82401765, 119.21953847,\n",
       "       119.20255892, 116.99605438, 116.64435882, 116.4482475 ,\n",
       "       116.20164051, 115.84430421, 115.75866591, 115.20034272])"
      ]
     },
     "execution_count": 178,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "distances = np.zeros([df.shape[0]])\n",
    "for i in range(num_cluster):\n",
    "    centroid = kmeans.cluster_centers_[i]\n",
    "    distances[y_cluster==i] = np.sqrt(np.sum((X_pca[y_cluster==i] - centroid)**2, axis = 1))\n",
    "np.sort(distances)[::-1][:100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 180,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(12327843.447282968, 12327843.447282847)"
      ]
     },
     "execution_count": 180,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sum(distances ** 2), kmeans.inertia_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Average distance of a point to its closest centroid to within each cluster"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ac769d6a0>"
      ]
     },
     "execution_count": 201,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD/CAYAAAD/qh1PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFX9JREFUeJzt3X2wJXV95/H3lweJBAIDXB6cGRzKjAENy+jeJWw0K4IbR00yWCsbSJVOXHYnqcWHZN3aYDZbkF3Zxa2o2dQqqTEQB1dF1ocwleADAsaYRGDQcQYYWEZEmMwINyKolV1L8Lt/9O/Gw5m+c/s8zT33l/erqut2//rXfb6nT59P9+nzcCMzkSTV65ClLkCSNFkGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVe6wpS4A4IQTTsg1a9YsdRmStKzcddddf5uZM4v1m4qgX7NmDdu2bVvqMiRpWYmIb3Tp56UbSaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuWm4gtTvdZc9met7Q9d9ZqDXIkk1cEzekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMotGvQR8WMRcUdEfDUi7omI3y3tp0XE7RHxQER8NCKeVdqPKNO7y/w1k70LkqQD6XJG/33gvMw8C1gHrI+Ic4B3Au/JzLXAt4FLSv9LgG9n5k8C7yn9JElLZNGgz8b3yuThZUjgPOBjpX0LcEEZ31CmKfPPj4gYW8WSpIF0ukYfEYdGxHbgMeBm4GvAE5n5VOmyB1hZxlcCjwCU+U8Cx4+zaElSd52CPjOfzsx1wCrgbOCMtm7lb9vZe/Y3RMSmiNgWEdvm5ua61itJGtBAn7rJzCeAzwPnAMdGxPy/IlwF7C3je4DVAGX+McDjLevanJmzmTk7MzMzXPWSpEV1+dTNTEQcW8afDbwC2AXcBryudNsI3FjGt5ZpyvxbM3O/M3pJ0sHR5Z+DnwJsiYhDaQ4MN2Tmn0bEvcD1EfEO4CvANaX/NcAHI2I3zZn8RROoW5LU0aJBn5k7gBe1tD9Ic72+v/3/AReOpTpJ0sj8ZqwkVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SarcokEfEasj4raI2BUR90TEW0v7FRHxNxGxvQyv7lnm7RGxOyLuj4hXTvIOSJIO7LAOfZ4C3paZX46Io4G7IuLmMu89mfl7vZ0j4gXARcALgecAn4uI52fm0+MsXJLUzaJn9Jm5LzO/XMa/C+wCVh5gkQ3A9Zn5/cz8OrAbOHscxUqSBjfQNfqIWAO8CLi9NL0pInZExLURsaK0rQQe6VlsDwc+MEiSJqhz0EfEUcDHgd/IzO8AVwPPA9YB+4B3zXdtWTxb1rcpIrZFxLa5ubmBC5ckddMp6CPicJqQ/1BmfgIgMx/NzKcz84fA+/nR5Zk9wOqexVcBe/vXmZmbM3M2M2dnZmZGuQ+SpAPo8qmbAK4BdmXmu3vaT+np9lrg7jK+FbgoIo6IiNOAtcAd4ytZkjSILp+6eQnwemBnRGwvbb8NXBwR62guyzwE/BpAZt4TETcA99J8YudSP3EjSUtn0aDPzC/Sft39pgMscyVw5Qh1SZLGxG/GSlLlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klS5RYM+IlZHxG0RsSsi7omIt5b24yLi5oh4oPxdUdojIv4gInZHxI6IePGk74QkaWFdzuifAt6WmWcA5wCXRsQLgMuAWzJzLXBLmQZ4FbC2DJuAq8detSSps0WDPjP3ZeaXy/h3gV3ASmADsKV02wJcUMY3ANdl40vAsRFxytgrlyR1MtA1+ohYA7wIuB04KTP3QXMwAE4s3VYCj/Qstqe0SZKWQOegj4ijgI8Dv5GZ3zlQ15a2bFnfpojYFhHb5ubmupYhSRpQp6CPiMNpQv5DmfmJ0vzo/CWZ8vex0r4HWN2z+Cpgb/86M3NzZs5m5uzMzMyw9UuSFtHlUzcBXAPsysx398zaCmws4xuBG3va31A+fXMO8OT8JR5J0sF3WIc+LwFeD+yMiO2l7beBq4AbIuIS4GHgwjLvJuDVwG7g74A3jrViSdJAFg36zPwi7dfdAc5v6Z/ApSPWJUkaE78ZK0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKrdo0EfEtRHxWETc3dN2RUT8TURsL8Ore+a9PSJ2R8T9EfHKSRUuSeqmyxn9B4D1Le3vycx1ZbgJICJeAFwEvLAs876IOHRcxUqSBrdo0GfmF4DHO65vA3B9Zn4/M78O7AbOHqE+SdKIRrlG/6aI2FEu7awobSuBR3r67CltkqQlctiQy10N/Bcgy993Af8KiJa+2baCiNgEbAI49dRTh6viimMWaH9yuPVJUoWGOqPPzEcz8+nM/CHwfn50eWYPsLqn6ypg7wLr2JyZs5k5OzMzM0wZkqQOhgr6iDilZ/K1wPwncrYCF0XEERFxGrAWuGO0EiVJo1j00k1EfAQ4FzghIvYAlwPnRsQ6mssyDwG/BpCZ90TEDcC9wFPApZn59GRKlyR1sWjQZ+bFLc3XHKD/lcCVoxQlSRofvxkrSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIqZ9BLUuUWDfqIuDYiHouIu3vajouImyPigfJ3RWmPiPiDiNgdETsi4sWTLF6StLguZ/QfANb3tV0G3JKZa4FbyjTAq4C1ZdgEXD2eMiVJw1o06DPzC8Djfc0bgC1lfAtwQU/7ddn4EnBsRJwyrmIlSYM7bMjlTsrMfQCZuS8iTiztK4FHevrtKW37hi9xPM7ccmZr+86NOw9yJZJ0cI37zdhoacvWjhGbImJbRGybm5sbcxmSpHnDBv2j85dkyt/HSvseYHVPv1XA3rYVZObmzJzNzNmZmZkhy5AkLWbYoN8KbCzjG4Ebe9rfUD59cw7w5PwlHknS0lj0Gn1EfAQ4FzghIvYAlwNXATdExCXAw8CFpftNwKuB3cDfAW+cQM2SpAEsGvSZefECs85v6ZvApaMWJUkaH78ZK0mVM+glqXIGvSRVzqCXpMoN+83Yqu06/YzW9jPu23WQK5Gk0XlGL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDmDXpIq588Uj8F7f/3W/dou/cPzlqASSdqfZ/SSVDmDXpIqZ9BLUuVGukYfEQ8B3wWeBp7KzNmIOA74KLAGeAj4l5n57dHKlCQNaxxn9C/PzHWZOVumLwNuycy1wC1lWpK0RCZx6WYDsKWMbwEumMBtSJI6GjXoE/hsRNwVEZtK20mZuQ+g/D2xbcGI2BQR2yJi29zc3IhlSJIWMurn6F+SmXsj4kTg5oi4r+uCmbkZ2AwwOzubI9YhSVrASGf0mbm3/H0M+CRwNvBoRJwCUP4+NmqRkqThDR30EfHjEXH0/Djw88DdwFZgY+m2Ebhx1CIlScMb5dLNScAnI2J+PR/OzE9HxJ3ADRFxCfAwcOHoZUqShjV00Gfmg8BZLe3fAs4fpShJ0vj4zVhJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SarcqL9eqQG965d/Yb+2t330T5egEkn/UHhGL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5fwc/RTbc9lftLavuurnDnIlkpYzz+glqXIGvSRVzks3lbjiiis6t99y6/Na+55/3tfGWJGkaWHQ64BOvm17a/s3X77uIFciaVgTC/qIWA/8D+BQ4I8y86pJ3Zamx5rL/my/toeues3IfSUNbyJBHxGHAu8F/jmwB7gzIrZm5r2TuD39A3DFMS1tT7Z2PXPLma3tOzfu3K9t1+lntPY9475d3WuTptykzujPBnZn5oMAEXE9sAEw6LVsvffXb21tv/QPz2ttH+Qnqds+SrvQx2jb3ndZ6D0aCSYX9CuBR3qm9wA/M6HbkrSAQd54H+T9mLbLbjCmy3QDvHpTN5GZ419pxIXAKzPzX5fp1wNnZ+abe/psAjaVyZ8C7m9Z1QnA33a82eXWd1rqmIa+01LHNPSdljqWW99pqeNg931uZs4sunRmjn0A/inwmZ7ptwNvH2I922rtOy11TEPfaaljGvpOSx3Lre+01DENfduGSX1h6k5gbUScFhHPAi4Ctk7otiRJBzCRa/SZ+VREvAn4DM3HK6/NzHsmcVuSpAOb2OfoM/Mm4KYRV7O54r7TUsc09J2WOqah77TUsdz6Tksd09B3PxN5M1aSND38UTNJqpxBL0mV80fNNLV6PrG1NzM/FxG/AvwssAvYnJk/aFnmbCAz886IeAGwHrivvGekCYiI6zLzDUtdx7hExOk03+RfCSSwF9iamcv2dzGquUYfESdm5mMt7T8D7MrM70TEs4HLgBfT/BzDf81Mv3LXUUQcn5nfOoi39yGak5EjgSeAo4BPAOfT7Lsb+/pfDryqLHMzzbexPw+8guZ7HVf29H0L8MnM7P0GtxYREf0fkw7g5cCtAJn5SyOs+3nAa4HVwFPAA8BH2p6jJYxXArdn5vd62tdn5qdHqOG3gIuB62m+0Q+wiuaE4/pcgh9njIiX0vyszN2Z+dmhVjLKh/AnPQDHL9B+XN9wPPAQsAI4rq/vPcBhZXwz8PvAS4HLgU/09V3fM34McA2wA/gwcNII9+PLwO8AzxthHSeOYXseA1wF3Ad8qwy7StuxfX2vAk4o47PAg8Bu4BvAy0ao4WTgapofvTseuALYCdwAnNLXd0f5exjwKHBomY75eX39d9J8nPdI4DvAT5T2Z/f3B56kOVP7C+DfAjMHYX/+VN/0UcB/Lvvok8Ac8CXgVyddS18dPwH8N+CDwK/0zXtfy778v4BzgZeVv/vK+Mv6+s4Ct5X+q2kOvk/SfM/mRX1931Lm/w7wV8D7gCtpTsjObel7P/AnNM/7Db31DXjf39g3/X+Aw1v6PQt44CA9Hnf0jP8bYDtNXv0lcNlQ6zyYO9Qid65zsAA/BL7eN/yg/H2wr++uhXYCYHv/Ttwz/kfAO4DnAr8J/ElLzZ125FLX7wEPA3eU9T3nANtikAPZIOH9GeC3gJN72k4ubTf39d3ZM34b8E/K+PMZ/BuLn+oZ/zTwZppXVjvKbZ9a2m7sW+7u8gRbAXx3/r4DP9b7uPb0/0rb+AKP9Vdo3qP6eZoD+lypbSNwdP9+QccDNc2rxbbhHwP7+vreCPwqzRnjvwP+E7AW2ELzarN/3YMEcueDCPDxsr9cQPPFxo8DRyzwnDmk7L83A+tK24MLbIs7aF5hXUzz21evK+3nA3/dv7/xowP5kcDny/ipLY/lTuCoMr4G2Aa8te1x7/B4Pdw3fR/Nzwr093sucH9f2yAnLZ1PIvv24zspJyHAj9PzvBzofg6z0CQGBggW4N/TPCnP7Gn7+gLr/d+Uozbwx8Bsz3rv7OvbG/T9wbC9Zd2dduS+9f4czdnKN8v93NSy3kEOZIOE9/39t7XQvLLDz78S+tJCj1VPW6eA69uJ+59k/dv8N2kO+N+gOYu7BXh/eTJd3lLD7cCRZfyQnvZj2D+w+qcPB34J+Agw1zev84EaeJrmMsZtLcP/7ev71b7pO+drp3lfoX/dgwRy54NIy3b/jzRnj8f3r7enzyqa59b/7H8cOz7WbeE9f19WAHf1zLu7r++9fdNH0eTBu/vvS5m/Y4FhJ/D9vr7raU4wP0VzBWBzWfduesK69B3kpKXzSSTw1bINjmf/7BvoQPb3yw2z0CQGBg+W+R3t3cDRLHxWcQzwAeBrNEHwA5rw+HPgrL6+e8qT4m2lT/TuLMPuyG1PFppLDOuBP26ZN8iBbJDw/izwH+g5gwBOKjvo5/r6vrn0P4/mTOX3gX8G/C7wwZbb6hRw9IQb8I4Oj/NzKKEKHAu8juYH8tru7xELtJ/Quy37H5+W/s/um+58oKZ5FbJ2gfU+0jf9V8BLy/gv8szfh9rvcWWAQGaAgwjNK8BD+to20rwa+MZC26n0ew0trz7KvL+mecV0Ic3B+oLS/jL2D7C30oTlZposmD85mwG+0Nf3VsqriZ62w4DrgKdb6ngUWEcTrL3DGpo3+vv7HwKcA/yLsr+dQ3m1sdA+xOInLZ1PImlevT9IObGjnMTRHND2O5B1GQZeYFIDAwZLz3K/SPOS9JuLrP9o4CyaM8zW6+0018F6h/mXTCcD1w27I9O8iTPo9uh6IBskvFcA7yxPpG8Dj9M8yd9J3yWh0v9c4KM0lzl20nzTeRPt1zA7BRzN5YSjWvr8JPCxg7i/PX+Avp0P1CUYfmqB9VzQN30WzSuEJ4Avzi9HE25vaVm+cyAzwEEE+O/AK1pubz0jXJcu9+8zNGfHp9P8x7knSr0/29L/hWX7nd7huXHyAvNe0tJ2zfy2aJn34RHuX+eTFgY8iVzg9o4EThuq1mHv5CSGAwTLYS19T6e5RHIUzZttPz2/c45Yw9+vt699v/UOsiMPst6++Qc8kPHM8H6cZ4b3igXu3ys63r9BtsUgATfUtljC/XKgA/WA2+2MAR6PzoEM/COeeRB5fmlf6CCyUM2vGnHbnbGcHusB71vnkxYGPIkce61LvbE6btD+d8bH9q5733rfPK719tY86np55oHsjcPUMOh2G+c2Hue2mLZhDNv4vnFv4yFqnshjMs77t9yGUR6PidSz1Buk44bov/41tnfdJ7Xe3pontd5JbrflsC2mYZjGbTzJmgesqarH+mA9HpMYpuabsRGxY6FZNNecex2a5UsSmflQRJwLfCwinlv6D2ug9Q5Q86TWO8ntNhXbYhosw228LJ9Py80EH4+xm5qgp7mzr6R5k7BX0Lyx1OubEbEuM7cDZOb3IuIXgGuBM0eoYdD1dq15UusdtO8gdUzLtpgGy20bT7LmQSzHx3oQk3o8xm+pX+L0vHzp/M44A77rPkANE3k3f1LrneR2m5ZtMQ3DctvGk6x5wO227B7rCe4XE/nkT9ehmt+6kSS182eKJalyBr0kVc6gl6TKGfSSVDmDXpIq9/8BeVBOyvGzBhMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1ac8149a90>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cluster_avg_distances = []\n",
    "for i in range(num_cluster):\n",
    "    cluster_avg_distances.append(np.mean(distances[y_cluster == i]))\n",
    "pd.Series(cluster_avg_distances).sort_values(ascending=False).plot.bar()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ac87e8588>"
      ]
     },
     "execution_count": 203,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD/CAYAAAD/qh1PAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAF35JREFUeJzt3X+wX3V95/HnWyJWRAmEy48mYKxGwV2WSLM0rVqRuC0/rGFnYSrdKSnDbtZZFq3aqXG3O6Y7dos7W7XMWpxUtMGqgKglo2hNA2ztVihBkKCJS0AgmQC5FYh12XbFvveP87nDl+/93txz7v1+77358HzMnPme8zmf7/m+z7nn+zrne74/bmQmkqR6vWC+C5AkjZZBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlVs03wUAHHvssbl8+fL5LkOSDil33XXX32bm2HT9FkTQL1++nO3bt893GZJ0SImIh9v089KNJFVu2qCPiNdExD09ww8j4jcj4piI2BoR95fbo0v/iIirImJ3RNwbEWeMfjUkSVOZNugz83uZuTIzVwI/CzwNfAnYAGzLzBXAtjINcC6wogzrgatHUbgkqZ2ul27WAA9k5sPAWmBzad8MXFDG1wLXZuN2YHFEnDiUaiVJnXUN+rcDnyvjx2fmowDl9rjSvhTY03OfvaVNkjQPWgd9RBwOvA34/HRdB7RN+jdWEbE+IrZHxPbx8fG2ZUiSOupyRn8u8K3MfLxMPz5xSabc7i/te4GTeu63DNjXv7DM3JSZqzJz1djYtB8DlSTNUJegv5hnL9sAbAHWlfF1wE097ZeUT9+sBg5MXOKRJM29Vl+YiogjgH8B/Lue5iuBGyLiMuAR4KLSfjNwHrCb5hM6l3YpaPmGr0xqe+jK87ssQpLUo1XQZ+bTwJK+th/QfAqnv28Clw+lOknSrPnNWEmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlWv1rwQXrI1HTdF+YG7rkKQFzDN6SaqcQS9JlWsV9BGxOCJujIhdEbEzIn4+Io6JiK0RcX+5Pbr0jYi4KiJ2R8S9EXHGaFdBknQwbc/o/xD4WmaeApwO7AQ2ANsycwWwrUwDnAusKMN64OqhVixJ6mTaoI+IlwG/CFwDkJn/LzOfAtYCm0u3zcAFZXwtcG02bgcWR8SJQ69cktRKmzP6nwHGgU9FxN0R8YmIeAlwfGY+ClBujyv9lwJ7eu6/t7RJkuZBm6BfBJwBXJ2ZrwP+D89ephkkBrTlpE4R6yNie0RsHx8fb1WsJKm7NkG/F9ibmXeU6Rtpgv/xiUsy5XZ/T/+Teu6/DNjXv9DM3JSZqzJz1djY2EzrlyRNY9qgz8zHgD0R8ZrStAb4LrAFWFfa1gE3lfEtwCXl0zergQMTl3gkSXOv7TdjrwA+ExGHAw8Cl9IcJG6IiMuAR4CLSt+bgfOA3cDTpa8kaZ60CvrMvAdYNWDWmgF9E7h8lnVJkobEb8ZKUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVa5V0EfEQxGxIyLuiYjtpe2YiNgaEfeX26NLe0TEVRGxOyLujYgzRrkCkqSD63JG/+bMXJmZq8r0BmBbZq4AtpVpgHOBFWVYD1w9rGIlSd3N5tLNWmBzGd8MXNDTfm02bgcWR8SJs3gcSdIstA36BL4eEXdFxPrSdnxmPgpQbo8r7UuBPT333VvaJEnzYFHLfq/PzH0RcRywNSJ2HaRvDGjLSZ2aA8Z6gJNPPrllGZKkrlqd0WfmvnK7H/gScCbw+MQlmXK7v3TfC5zUc/dlwL4By9yUmasyc9XY2NjM10CSdFDTBn1EvCQiXjoxDvwScB+wBVhXuq0DbirjW4BLyqdvVgMHJi7xSJLmXptLN8cDX4qIif6fzcyvRcSdwA0RcRnwCHBR6X8zcB6wG3gauHToVUuSWps26DPzQeD0Ae0/ANYMaE/g8qFUJ0maNb8ZK0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVLm2/2HqkHfa5tMGtu9Yt2OOK5GkueUZvSRVzqCXpMoZ9JJUOYNekir3vHkztoudp5w6sP3UXTvnuBJJmj3P6CWpcga9JFWuddBHxGERcXdEfLlMvyIi7oiI+yPi+og4vLS/qEzvLvOXj6Z0SVIbXc7o3wX0XqT+EPCRzFwBPAlcVtovA57MzFcBHyn9JEnzpFXQR8Qy4HzgE2U6gLOBG0uXzcAFZXxtmabMX1P6S5LmQdsz+o8Cvw38Y5leAjyVmc+U6b3A0jK+FNgDUOYfKP0lSfNg2qCPiLcC+zPzrt7mAV2zxbze5a6PiO0RsX18fLxVsZKk7tqc0b8eeFtEPARcR3PJ5qPA4oiY+Bz+MmBfGd8LnARQ5h8FPNG/0MzclJmrMnPV2NjYrFZCkjS1ab8wlZnvB94PEBFnAb+Vmf86Ij4PXEgT/uuAm8pdtpTpb5b5t2TmpDP6mnzsHbdMarv842fPQyWSNNlsPkf/PuA9EbGb5hr8NaX9GmBJaX8PsGF2JUqSZqPTTyBk5m3AbWX8QeDMAX3+HrhoCLVJkobAb8ZKUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVW7aoI+In4qIv4mIb0fEdyLid0v7KyLijoi4PyKuj4jDS/uLyvTuMn/5aFdBknQwbc7o/wE4OzNPB1YC50TEauBDwEcycwXwJHBZ6X8Z8GRmvgr4SOknSZon0wZ9Nn5UJl9YhgTOBm4s7ZuBC8r42jJNmb8mImJoFUuSOml1jT4iDouIe4D9wFbgAeCpzHymdNkLLC3jS4E9AGX+AWDJMIuWJLXXKugz8yeZuRJYBpwJnDqoW7kddPae/Q0RsT4itkfE9vHx8bb1SpI66vSpm8x8CrgNWA0sjohFZdYyYF8Z3wucBFDmHwU8MWBZmzJzVWauGhsbm1n1kqRptfnUzVhELC7jLwbeAuwEbgUuLN3WATeV8S1lmjL/lsycdEYvSZobi6bvwonA5og4jObAcENmfjkivgtcFxEfBO4Grin9rwE+HRG7ac7k3z6CuiVJLU0b9Jl5L/C6Ae0P0lyv72//e+CioVQnSZo1vxkrSZVrc+lGQ/QHv/rWSW3vvf7L81CJpOcLg34B27vhG5Pall35xoF9N27c2Lp92y2vHNh3zdkPtK5N0qHDSzeSVDmDXpIqZ9BLUuUMekmqnEEvSZUz6CWpcga9JFXOoJekyhn0klQ5g16SKmfQS1LlDHpJqpxBL0mVM+glqXIGvSRVzqCXpMr5j0d0UCfces/A9sfevHKOK5E0U57RS1Llpg36iDgpIm6NiJ0R8Z2IeFdpPyYitkbE/eX26NIeEXFVROyOiHsj4oxRr4QkaWptzuifAd6bmacCq4HLI+K1wAZgW2auALaVaYBzgRVlWA9cPfSqJUmtTRv0mfloZn6rjP8dsBNYCqwFNpdum4ELyvha4Nps3A4sjogTh165JKmVTtfoI2I58DrgDuD4zHwUmoMBcFzpthTY03O3vaVNkjQPWgd9RBwJfAH4zcz84cG6DmjLActbHxHbI2L7+Ph42zIkSR21CvqIeCFNyH8mM79Ymh+fuCRTbveX9r3AST13Xwbs619mZm7KzFWZuWpsbGym9UuSptHmUzcBXAPszMwP98zaAqwr4+uAm3raLymfvlkNHJi4xCNJmnttvjD1euDXgR0RMfHtmf8IXAncEBGXAY8AF5V5NwPnAbuBp4FLh1qxFqzlG74ysP2hK8+f40ok9Zo26DPzrxh83R1gzYD+CVw+y7okSUPiN2MlqXIGvSRVzqCXpMoZ9JJUOX+mWPNm0Kd0/ISONHye0UtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TKGfSSVDk/R69Dw8ajBrQdmPs6pEOQQa/qnLb5tIHtO9btmNS285RTB/Y9ddfOodYkzScv3UhS5Qx6SaqcQS9JlTPoJalyvhkrtfSxd9wysP3yj589x5VI3XhGL0mVM+glqXLTBn1EfDIi9kfEfT1tx0TE1oi4v9weXdojIq6KiN0RcW9EnDHK4iVJ02tzRv8nwDl9bRuAbZm5AthWpgHOBVaUYT1w9XDKlCTN1LRBn5l/CTzR17wW2FzGNwMX9LRfm43bgcURceKwipUkdTfTa/THZ+ajAOX2uNK+FNjT029vaZMkzZNhvxkbA9pyYMeI9RGxPSK2j4+PD7kMSdKEmQb94xOXZMrt/tK+Fzipp98yYN+gBWTmpsxclZmrxsbGZliGJGk6Mw36LcC6Mr4OuKmn/ZLy6ZvVwIGJSzySpPkx7TdjI+JzwFnAsRGxF/gAcCVwQ0RcBjwCXFS63wycB+wGngYuHUHNkqQOpg36zLx4illrBvRN4PLZFiUd6v7gV986sP291395jiuR/GasJFXPoJekyhn0klQ5g16SKmfQS1LlDHpJqpz/YUpaAPZu+MaktmVXvnEeKlGNPKOXpMoZ9JJUOYNekipn0EtS5Qx6SaqcQS9JlTPoJalyBr0kVc6gl6TK+c1Y6RCzcePGVm3SBM/oJalyBr0kVc5LN1LFtt3yyklta85+YB4q0Xwy6CUBcMKt9wxsf+zNKye1Ld/wlYF9H7ry/KHWpOEYSdBHxDnAHwKHAZ/IzCtH8TiSFr7OB4WNRw1oOzDEip5/hn6NPiIOAz4GnAu8Frg4Il477MeRJLUzijP6M4HdmfkgQERcB6wFvjuCx5L0PHba5tMmte1Yt2Ng352nnDqp7dRdO4de00I0iqBfCuzpmd4L/NwIHkeSRuJj77hlYPvlHz97jisZjsjM4S4w4iLglzPz35TpXwfOzMwr+vqtB9aXydcA3xuwuGOBv2350DX3XSh1LIS+C6WOhdB3odRxqPVdKHUMo+/LM3Ns2ntn5lAH4OeBP++Zfj/w/hkua7t9F04dC6HvQqljIfRdKHUcan0XSh2jXL/+YRRfmLoTWBERr4iIw4G3A1tG8DiSpBaGfo0+M5+JiP8A/DnNxys/mZnfGfbjSJLaGcnn6DPzZuDmISxqk30XVB0Loe9CqWMh9F0odRxqfRdKHaNcv+cY+puxkqSFxR81k6TKGfSSVDl/1ExzqueTWPsy8y8i4teAXwB2Apsy88fT3P/azLxkDkpdcCLiTCAz887ysyLnALvKe2Iagog4heab/EuBBPYBWzLzkP4KbTXX6CNiSWb+YATLPS4z9w9o/zlgZ2b+MCJeDGwAzqD5qYf/mpn+CtMAEfEZmhOMI4CngCOBLwJraPbHdT19+z+WG8CbgVsAMvNtfct+JfAvgZOAZ4D7gc/V8LeIiA/Q/H7UImArzbfNbwPeQvO9ld+bxbLfCXwpM/dM23mESsguBe7IzB/1tJ+TmV+bg8d/H3AxcB3NN/oBltGcmFyX8/TjjBHxBpqflrkvM78+o4XM5kP4ox6A46ZovxI4toyvAh4EdgMPA29qsdwlU7Qf0zcsAR4CjgaO6ev7HWBRGd8EfBR4A/AB4It9fc/pGT8KuAa4F/gscPwst9EJwNU0PyS3BNgI7ABuAE7s67sKuBX4U5ow3AocoPnuw+tmUcO3gN8BXtmi773ldhHwOHBYmY6JeX3L/VPgLOBN5fbRMv6mvr7vLOvzO8BfA38E/B7NgfesAXUcCfyX8nc8AIwDtwO/Mcu/x1Fl/9wF/KAMO0vb4g7LubRvegfNx5WPAH4IvKy0v7h/u82g5gM0Z67fAP49MDbD5Xy1b/plwO8DnwZ+rW/eHw34+30P+LPynFvbux/MZv061P+/gRcOaD8cuH8uaiiP9zc94/8WuKfkyv8CNsxomXNVfIuV6xKyO3rGbwX+eRl/NX3fIKPDQQH4R+D7fcOPy+2DfX13TrUjAvf0TX+rZ/wTwAeBlwPvBv5swLboEpxfA66geUVxL/A+4OTSdlP/DkRzVngxze8RXVja1wDf7Ovb+qBQts9/Bx4pj/Fu4KenqPe+8sQ5Gvi7ib8t8FO927S0vaAsayuwsrQ9OMVyd/DsQeMI4LYyfjJw94D+NwG/QXPG9h7gPwMrgM00r8h6+7Y+KNB8f+R9wAk9bSeUtq0dng+P9E3fPWh80P5W2lofcIC7y7b+JZqTkPGyX60DXtrX94wphp8FHu3r+4XyeBfQfGnyC8CLpnjO7ACOLOPLge3Auwatb2lrfRCZZjt/tWd8F81PCvT3eTnwvQHtrU/g6HZC1vu3vpNy4AVeQk/2dRk632FUA91CdhfPnk3f3r/DTDXN9AeF3yo7+Gk9bd+fot7PU866gE8Bq3qWe2df396g7z8IDHqSdgnO3p2iPxz6H+tgffvDo8tBoXf93khzNv1Y2d7r+/q+m+Zg+zDNWdw24I/LTv+BKdZxWdne/6O/7t6/M8+GyNHAXT3z7hvQ/9t903eW2xfQXPfundfloDApEKaaRxMKg4YdwD/09b0DOGKixp72oxhwxkuHA07//YEXAm8DPgeM9837Cc2ls1sHDP93mv3vP9GclS4Z8Jjf7Zs+kua5+OH+5ZT5XQ4irQ5ONO957Aa+SvMqfVOpYTc9oT7Ffn/QEzi6nZB9u+zDS5icUZMOem2GGYXyKAa6hewVwNeBs2mOjB8FfhH4XeDTfX1bHxRK20SofBh4KVOfQR4F/AnwAM2T8Mc0AfY/gdP7+u6lCYj3lj7RM2/Sy266Bee3e8Y/eLD1A75Jc9Z2EU3QXlDa33SwHYrpDwqDguaw8sT51IB5P005cAGLgQtpfvhuun3kfPqCtWfeu8oTaFP5m08chMeAvxzQ/6+BN5TxX+G5v8/UH8hdDgpfB36bnjM64HiaJ/Zf9PV9HFhJEw69w3KaN6t7+75oivU+lp7nzFTrcLB5/X/Pvnkv7pu+D1gxRd89fdM76TkolbZ1NK+MHu5rv4Xyqq2nbRFwLfCTAY/V5SDS5eD0AmA18K/Kfrma8kpxQA2tT+CmeT71932IJie+X25PKO1H9vdtO3S+wygHWoZs6XsWcD3Ny84dNN/EXU/fNTY6HBT67vcrNC/PH5um5pcCp9OcHQy83k5zfa13mHgpdgJw7cF2oJ62gcFJc0nhyAH9XwXc2Nd2Os2Z3leBU2j+C9hT5Yn3C319uxwUrpvvfafU8U/Kk/OUFn1Pp3nV8hTwV8BrSvsY8M6+vl0OCkcDH6I52DwJPEETeB9i8iXIayaWO6C+z85yW3Q54Ly6w3IvnNhWA+Zd0Df934C3DOh3Dn3XvGme+ydMsdzXD2jrchBpfXDquI1bn8DR4YTsII93BPCKGdU6m51pVAMtQrYE1Zr+kGPwS6yzGHxQWHSw5dK80fVPp1pux3XqUm+n4Oy47FPb9KXDQaFrDQtlKNviLS22xT/juQeFV5f2SQeFnm0x7XJHvG69B5wneO4B5+hZLrvL/jZV33NnWUOXg0jrg1PHGlqfwNHhhGwk+8Nc7Xgz2Ii9Idv/CYShvEM/quUOeJwrhrXcATW3XnZZv10j2G5DW7853L9GtS3m/dMjXWuewXZru7/Ny37RZf1msy0Weg3PeYxRP8CQNlr/Na1O79DP9XIHPM7Qljubmg+17Tbifep5uy36ax7VdpuvbdFl/WazLRZ6Db3DgvlmbETcO9UsmmuLvQ7L8oWKzHwoIs4CboyIl5f+I19uR52WO8KaD7XtNkpVb4uONXcxkv2tqy7rN6ptsRBqaGvBBD3Nyv4yzRtYvYLmzbBej0XEysy8ByAzfxQRbwU+CfT/t+BRLbeLrssdVc2H2nYbpdq3RZeauxjV/tZVl/Ub1bZYCDW0M+qXDB1evrT+BAId3qEf1XI7rlvXTxSMalscUtttxPtb1duiS80j3G4j2xYd/yaj2hbzXkPboZrfupEkDebPFEtS5Qx6SaqcQS9JlTPoJalyBr0kVe7/A/QkziA0YWcRAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1ac85a8e80>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cluster_max_distances = []\n",
    "for i in range(num_cluster):\n",
    "    cluster_max_distances.append(np.max(distances[y_cluster == i]))\n",
    "pd.Series(cluster_max_distances).sort_values(ascending=False).plot.bar()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "normal    82\n",
       "attack    21\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(df.label[distances>113]).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAD8CAYAAAB5Pm/hAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFXBJREFUeJzt3W9sXfWd5/H3FyexIaQNEINQ/myoJqqMXA1lrQ6r8mAcdlaFXQ08GKSaZYkak4BCrBllpQ3b+2A20poAD9KFu6tkk4addDX1TOhuIeLP7iLqatbqtjtmygDF05IhbWOFJWFCMm0skz9894FPMrYx8c2NnRufvl/S1Tnne3/33q8l+NyT3z1/IjORJJXXFY1uQJI0uwx6SSo5g16SSs6gl6SSM+glqeQMekkqOYNekkrOoJekkjPoJank5jW6AYAlS5bkypUrG92GJM0pr7322geZ2TrduMsi6FeuXMng4GCj25CkOSUiflHLOKduJKnkpg36iPh8RLw+7vH3EfFHEXFtRLwSEe8Uy2uK8RERT0fE/oh4IyJunf0/Q5L0aaYN+sz8aWbekpm3AP8YGAG+CzwKvJqZq4BXi22AO4FVxWM9sH02Gpck1eZCp27uAP42M38B3A3sKep7gHuK9buBb+WYHwKLI+LGGelWknTBLjTovwr0Fes3ZOZ7AMXy+qK+FDg47jXDRU2S1AA1B31ELAB+H3h2uqFT1D5xd5OIWB8RgxExeOTIkVrbkC6Jvr4+2tvbaWpqor29nb6+vulfJF2mLmSP/k7grzLz/WL7/bNTMsXycFEfBpaPe90y4NDkN8vMnZnZkZkdra3THgYqXTJ9fX1UKhWq1Sqjo6NUq1UqlYphrznrQoK+i3+YtgHYB6wp1tcAz4+rP1AcfXMbcPzsFI80F/T29rJ79246OzuZP38+nZ2d7N69m97e3ka3JtUlarlnbERcxdi8++cy83hRuw7YC6wAfgncm5lHIyKA/wh8hbEjdL6Wmec9G6qjoyM9YUqXi6amJkZHR5k/f/652qlTp2hpaeHMmTMN7EyaKCJey8yO6cbVdGZsZo4A102q/R1jR+FMHpvAIzX2KV122traGBgYoLOz81xtYGCAtra2BnYl1c8zY6VJKpUK3d3d9Pf3c+rUKfr7++nu7qZSqTS6Nakul8W1bqTLSVdXFwA9PT0MDQ3R1tZGb2/vubo019Q0Rz/bnKOXpAtX6xy9UzeSVHIGvSSVnEEvSSVn0EtSyRn0klRyBr0klZxBL0klZ9BLUskZ9JJUcga9JJWcQS9JJWfQS1LJGfTSFHp6emhpaSEiaGlpoaenp9EtSXUz6KVJenp62LFjB4899hgnTpzgscceY8eOHYa95iwvUyxN0tLSwmOPPcamTZvO1bZt28bXv/51RkdHG9iZNFGtlyk26KVJIoITJ05w1VVXnauNjIywcOFCLof/X6SzvB69VKfm5mZ27NgxobZjxw6am5sb1JF0cWoK+ohYHBHfiYi/iYihiPgnEXFtRLwSEe8Uy2uKsRERT0fE/oh4IyJund0/QZpZ69atY/PmzWzbto2RkRG2bdvG5s2bWbduXaNbk+pS09RNROwB/ndmfjMiFgBXAV8Hjmbm4xHxKHBNZm6OiLuAHuAu4HeApzLzd873/k7d6HLT09PDrl27+Oijj2hubmbdunVUq9VGtyVNMGNz9BHxGeCvgc/luMER8VPgdzPzvYi4Efh+Zn4+Iv5zsd43edynfYZBL0kXbibn6D8HHAH+S0T8OCK+GRELgRvOhnexvL4YvxQ4OO71w0VNktQAtQT9POBWYHtmfhE4ATx6nvExRe0T/2yIiPURMRgRg0eOHKmpWUnShasl6IeB4cz8UbH9HcaC//1iyoZieXjc+OXjXr8MODT5TTNzZ2Z2ZGZHa2trvf1LkqYxbdBn5v8DDkbE54vSHcDbwD5gTVFbAzxfrO8DHiiOvrkNOH6++XlJ0uyaV+O4HuBPiyNu3gW+xtiXxN6I6AZ+CdxbjH2JsSNu9gMjxVhJUoPUFPSZ+Tow1S+7d0wxNoFHLrIvSdIM8cxYSSo5g16SSs6gl6SSM+glqeQMekkqOYNekkrOoJekkjPoJankDHpJKjmDXpJKzqCXpJIz6CWp5Ax6SSo5g16SSs6gl6SSM+glqeQMekkqOYNekkrOoJekkjPoJankagr6iPh5RLwZEa9HxGBRuzYiXomId4rlNUU9IuLpiNgfEW9ExK2z+QdIks7vQvboOzPzlszsKLYfBV7NzFXAq8U2wJ3AquKxHtg+U81Kki7cxUzd3A3sKdb3APeMq38rx/wQWBwRN17E50iSLkKtQZ/A/4qI1yJifVG7ITPfAyiW1xf1pcDBca8dLmqSpAaYV+O4L2fmoYi4HnglIv7mPGNjilp+YtDYF8Z6gBUrVtTYhiTpQtW0R5+Zh4rlYeC7wJeA989OyRTLw8XwYWD5uJcvAw5N8Z47M7MjMztaW1vr/wskSec1bdBHxMKIWHR2HfhnwFvAPmBNMWwN8Hyxvg94oDj65jbg+NkpHknSpVfL1M0NwHcj4uz4b2fm/4iIvwT2RkQ38Evg3mL8S8BdwH5gBPjajHctSarZtEGfme8Cvz1F/e+AO6aoJ/DIjHQnSbponhkrSSVn0EtSyRn0klRyBr0klZxBL0klZ9BLUskZ9JJUcga9JJWcQS9JJWfQS1Po6+ujvb2dpqYm2tvb6evra3RLUt1qvUyx9Bujr6+PSqXC7t27uf322xkYGKC7uxuArq6uBncnXbgYuzRNY3V0dOTg4GCj25AAaG9vp1qt0tnZea7W399PT08Pb731VgM7kyaKiNfG3d7108cZ9NJETU1NjI6OMn/+/HO1U6dO0dLSwpkzZxrYmTRRrUHvHL00SVtbGwMDAxNqAwMDtLW1Nagj6eIY9NIklUqF7u5u+vv7OXXqFP39/XR3d1OpVBrdmlQXf4yVJjn7g2tPTw9DQ0O0tbXR29vrD7Gas5yjl6Q5yjl66SJ4HL3KxKkbaZK+vj4eeughRkdH+fjjj/nZz37GQw89BHgcveYm9+ilSTZu3MjIyAiPP/44J06c4PHHH2dkZISNGzc2ujWpLjUHfUQ0RcSPI+KFYvumiPhRRLwTEX8eEQuKenOxvb94fuXstC7NjqNHj7J161Y2bdrEVVddxaZNm9i6dStHjx5tdGtSXS5kj/4PgaFx208A38jMVcCHQHdR7wY+zMzfAr5RjJPmlPb29vNuS3NJTUEfEcuAfw58s9gOYDXwnWLIHuCeYv3uYpvi+TuK8dKcMG/ePO6///4Jx9Hff//9zJvnT1qam2rdo/8PwL8BPi62rwOOZebpYnsYWFqsLwUOAhTPHy/GTxAR6yNiMCIGjxw5Umf70sx7+OGHOXbsGF1dXTQ3N9PV1cWxY8d4+OGHG92aVJdpgz4i/gVwODNfG1+eYmjW8Nw/FDJ3ZmZHZna0trbW1Kx0KVSrVTZs2MCxY8fITI4dO8aGDRuoVquNbk2qSy3/Fv0y8PsRcRfQAnyGsT38xRExr9hrXwYcKsYPA8uB4YiYB3wW8FcszSnVatVgV2lMu0efmf82M5dl5krgq8D3MvNfAv3AHxTD1gDPF+v7im2K57+Xl8Ppt5L0G+pijqPfDGyKiP2MzcHvLuq7geuK+ibg0YtrUZJ0MS7oMILM/D7w/WL9XeBLU4wZBe6dgd4kSTPAM2MlqeQMemkKXtRMZeIZINIk3hxcZeP16KVJvDm45gpvDi7VyZuDa67wxiNSndra2tiyZcuEOfotW7Z4c3DNWQa9NElnZydPPPEEa9eu5Ve/+hVr167liSeemDCVI80lBr00SX9/P5s3b+aZZ55h0aJFPPPMM2zevJn+/v5GtybVxTl6aRLn6DVXOEcv1amtrY2BgYEJtYGBAefoNWcZ9NIklUqF7u7uCTce6e7uplKpNLo1qS6eMCVNcvakqJ6eHoaGhmhra6O3t9eTpTRnOUcvSXOUc/SSJMCgl6TSM+glqeQMekkqOYNekkrOoJekkps26COiJSL+b0T8dUT8JCK2FPWbIuJHEfFORPx5RCwo6s3F9v7i+ZWz+ydIks6nlj36j4DVmfnbwC3AVyLiNuAJ4BuZuQr4EOguxncDH2bmbwHfKMZJc4q3ElSZTBv0OebXxeb84pHAauA7RX0PcE+xfnexTfH8HRERM9axNMvO3kqwWq0yOjpKtVqlUqkY9pqzapqjj4imiHgdOAy8AvwtcCwzTxdDhoGlxfpS4CBA8fxx4LqZbFqaTb29vdx333309PTQ0tJCT08P9913H729vY1uTapLTde6ycwzwC0RsRj4LjDVZfzOXkthqr33T1xnISLWA+sBVqxYUVOz0qXw9ttvMzIy8ombg//85z9vdGtSXS7oqJvMPAZ8H7gNWBwRZ78olgGHivVhYDlA8fxngaNTvNfOzOzIzI7W1tb6updmwYIFC9i4cSOdnZ3Mnz+fzs5ONm7cyIIFCxrdmlSXWo66aS325ImIK4F/CgwB/cAfFMPWAM8X6/uKbYrnv5eXw5XTpBqdPHmSarU64TLF1WqVkydPNro1qS61TN3cCOyJiCbGvhj2ZuYLEfE28GcR8e+BHwO7i/G7gf8aEfsZ25P/6iz0Lc2am2++mZGREVavXn2udtNNN3HzzTc3sCupftMGfWa+AXxxivq7wJemqI8C985Id1IDXHHFFRw4cICrr76aX//611x99dUcOHCAL3zhC41uTaqLZ8ZKk7z55pu0tLSwZMkSrrjiCpYsWUJLSwtvvvlmo1uT6mLQS1PYu3cvBw4c4MyZMxw4cIC9e/c2uiWpbga9NIUXXnjhvNvSXGLQS5MsXLiQnTt3smHDBo4fP86GDRvYuXMnCxcubHRrUl0MemmSXbt20dLSwvbt21m8eDHbt2+npaWFXbt2Nbo1qS4GvTRJV1cXDz74IM3NzQA0Nzfz4IMP0tXV1eDOpPoY9NIkfX19vPjii7z88sucPHmSl19+mRdffNGLmmnOisvhpNWOjo4cHBxsdBsSAO3t7VSrVTo7O8/V+vv76enp4a233mpgZ9JEEfFaZnZMO86glyZqampidHSU+fPnn6udOnWKlpYWzpw508DOpIlqDXqnbqRJ2traGBgYmFAbGBigrW2qi7ZKlz+DXpqkUqnQ3d094aJm3d3dVCqVRrcm1aWm69FLv0m6urr4wQ9+wJ133slHH31Ec3Mz69at86gbzVnu0UuTeNSNysYfY6VJPOpGc4VH3Uh18qgbzRUedSPVqa2tjS1bttDe3k5TUxPt7e1s2bLFo240Zxn00iSdnZ1s3bqVDz74gMzkgw8+YOvWrROmcqS5xKCXJnnuuedYtGgRV155JQBXXnklixYt4rnnnmtwZ1J9DHppkuHhYZ599lkOHDjAxx9/zIEDB3j22WcZHh5udGtSXQx6SSq5aYM+IpZHRH9EDEXETyLiD4v6tRHxSkS8UyyvKeoREU9HxP6IeCMibp3tP0KaScuWLWPNmjUTzoxds2YNy5Yta3RrUl1q2aM/DfzrzGwDbgMeiYibgUeBVzNzFfBqsQ1wJ7CqeKwHts9419IsevLJJzl9+jRr166lpaWFtWvXcvr0aZ588slGtybVZdqgz8z3MvOvivVfAUPAUuBuYE8xbA9wT7F+N/CtHPNDYHFE3DjjnUuzpKuri6eeeurcrQMXLlzIU0895SUQNGdd0LVuImIl8EXgR8ANmfkejH0ZRMT1xbClwMFxLxsuau9dbLPSpdLV1WWwqzRq/jE2Iq4G/hvwR5n59+cbOkXtE6ffRsT6iBiMiMEjR47U2oZ0SfT19U04Ycrr3GguqynoI2I+YyH/p5n534vy+2enZIrl4aI+DCwf9/JlwKHJ75mZOzOzIzM7Wltb6+1fmnF9fX1UKhWq1Sqjo6NUq1UqlYphrzmrlqNuAtgNDGXmtnFP7QPWFOtrgOfH1R8ojr65DTh+dopHmgt6e3vZvXs3nZ2dzJ8/n87OTnbv3k1vb2+jW5PqUsse/ZeBfwWsjojXi8ddwOPA70XEO8DvFdsALwHvAvuBXcCGmW9bmj1DQ0MMDw9PmLoZHh5maGio0a1JdfHqldIky5cv5/Tp03z729/m9ttvZ2BggPvuu4958+Zx8ODB6d9AukRqvXqld5iSpnD06FFWr159bnvBggVcf/3153mFdPky6KVJprqmzcmTJ73WjeYsr3UjSSVn0EtSyRn00qe45pprJiylucqglz7Fhx9+OGEpzVUGvSSVnEEvSSVn0EtSyRn0klRyBr0klZxBL0klZ9BLUskZ9JJUcga9JJWcQS9JJWfQS1LJGfSSVHIGvSSVnEEvSSU3bdBHxDMRcTgi3hpXuzYiXomId4rlNUU9IuLpiNgfEW9ExK2z2bwkaXq17NH/CfCVSbVHgVczcxXwarENcCewqnisB7bPTJuSpHpNG/SZ+RfA0Unlu4E9xfoe4J5x9W/lmB8CiyPixplqVpJ04eqdo78hM98DKJbXF/WlwMFx44aL2idExPqIGIyIwSNHjtTZhiRpOjP9Y2xMUcupBmbmzszsyMyO1tbWGW5DknRWvUH//tkpmWJ5uKgPA8vHjVsGHKq/PUnSxao36PcBa4r1NcDz4+oPFEff3AYcPzvFI0lqjHnTDYiIPuB3gSURMQz8MfA4sDciuoFfAvcWw18C7gL2AyPA12ahZ0nSBZg26DOz61OeumOKsQk8crFNSZJmjmfGSlLJGfSSVHIGvSSVnEEvSSVn0EtSyRn0klRyBr0klZxBL0klZ9BLUskZ9JJUcga9JJWcQS9JJWfQS1LJGfSSVHIGvSSVnEEvSSVn0EtSyRn0klRyBr0kldysBH1EfCUifhoR+yPi0dn4DElSbWY86COiCfhPwJ3AzUBXRNw8058jSarNbOzRfwnYn5nvZuZJ4M+Au2fhcyRJNZiNoF8KHBy3PVzUJEkNMG8W3jOmqOUnBkWsB9YDrFixYhbaUOn9u8/OytvmH3/mkn/m2Hsfn7331m+02Qj6YWD5uO1lwKHJgzJzJ7AToKOj4xNfBNK0ZikYI6baVxmT6X+qmntmY+rmL4FVEXFTRCwAvgrsm4XPkSTVYMb36DPzdERsBP4n0AQ8k5k/menPkWZLZk65V+/evOaq2Zi6ITNfAl6ajfeWLgVDXWXimbGSVHIGvSSVnEEvSSVn0EtSyRn0klRycTkcXRARR4BfNLoPaQpLgA8a3YT0Kf5RZrZON+iyCHrpchURg5nZ0eg+pIvh1I0klZxBL0klZ9BL57ez0Q1IF8s5ekkqOffoJankDHppChHxTEQcjoi3Gt2LdLEMemlqfwJ8pdFNSDPBoJemkJl/ARxtdB/STDDoJankDHpJKjmDXpJKzqCXpJIz6KUpREQf8H+Az0fEcER0N7onqV6eGStJJecevSSVnEEvSSVn0EtSyRn0klRyBr0klZxBL0klZ9BLUskZ9JJUcv8f1TT74kRMkvEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1ac1906860>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.boxplot(distances);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [],
   "source": [
    "def outliers(distances):\n",
    "    q1, q3 = np.percentile(distances, [0.25, 0.75])\n",
    "    iqr = q3-q1\n",
    "    upper_whisker = q3 + 1.5 * iqr\n",
    "    lower_whisker = q1 - 1.5 * iqr\n",
    "    return (distances > upper_whisker) | (distances < lower_whisker)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False    2610187\n",
       "True     2288244\n",
       "dtype: int64"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.Series(outliers(distances)).value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2806407,)"
      ]
     },
     "execution_count": 122,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.sqrt(np.sum((X_pca[y_cluster==i] - centroid)**2, axis = 1)).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 175,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(44016, 25)"
      ]
     },
     "execution_count": 175,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_pca[y_cluster==i].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
